예제 #1
0
def remote_site(args, computation_phase):
    ''' It will receive parameters from dsne_multi_shot. After receiving parameters it will compute tsne on high dimensional remote data and pass low dimensional values of remote site data


       args (dictionary): {
            "shared_X" (str):  remote site data
            "shared_Label" (str): remote site labels
            "no_dims" (int): Final plotting dimensions
            "initial_dims" (int): number of dimensions that PCA should produce
            "perplexity" (int): initial guess for nearest neighbor
            "max_iter" (str):  maximum number of iterations during tsne computation
            }
       computation_phase (string): remote

       normalize_columns:
           Shared data is normalized through this function

       Returns:
           Return args will contain previous args value in addition of Y[low dimensional Y values] values of shared_Y.
       args(dictionary):  {
           "shared_X" (str):  remote site data,
           "shared_Label" (str):  remote site labels
           "no_dims" (int): Final plotting dimensions,
           "initial_dims" (int): number of dimensions that PCA should produce
           "perplexity" (int): initial guess for nearest neighbor
           "shared_Y" : the low-dimensional remote site data
           }
       '''

    shared_X = np.loadtxt(args["shared_X"])
    #    sharedLabel = np.loadtxt(args.run["shared_Label"])
    no_dims = args["no_dims"]
    initial_dims = args["initial_dims"]
    perplexity = args["perplexity"]

    shared_X = normalize_columns(shared_X)
    (sharedRows, sharedColumns) = shared_X.shape

    init_Y = np.random.randn(sharedRows, no_dims)

    # shared data computation in tsne
    shared_Y = tsne(
        shared_X,
        init_Y,
        sharedRows,
        no_dims,
        initial_dims,
        perplexity,
        computation_phase=computation_phase)

    with open("Y_values.txt", "w") as f:
        for i in range(0, len(shared_Y)):
            f.write(str(shared_Y[i][0]) + '\t')
            f.write(str(shared_Y[i][1]) + '\n')

    args["shared_Y"] = "Y_values.txt"

    return args
예제 #2
0
def local_site1(args, compAvgError, computation_phase):
    ''' It will load local data and download remote data and place it on top. Then it will run tsne on combined data(shared + local) and return low dimensional shared Y and IY

       args (dictionary): {
           "shared_X" (str): file path to remote site data,
           "shared_Label" (str): file path to remote site labels
           "no_dims" (int): Final plotting dimensions,
           "initial_dims" (int): number of dimensions that PCA should produce
           "perplexity" (int): initial guess for nearest neighbor
           "shared_Y" (str):  the low-dimensional remote site data
           }


       Returns:
           computation_phase(local): It will return only low dimensional shared data from local site
           computation_phase(final): It will return only low dimensional local site data
           computation_phase(computation): It will return only low dimensional shared data Y and corresponding IY
       '''

    C= 0
    if computation_phase is 'local':

        shared_X = np.loadtxt(args["shared_X"])
        shared_Y = np.loadtxt(args["shared_Y"])
        no_dims = args["no_dims"]
        initial_dims = args["initial_dims"]
        perplexity = args["perplexity"]
        local_site1.sharedRows, local_site1.sharedColumns = shared_X.shape

        parser = argparse.ArgumentParser(
            description='''read in coinstac args for local computation''')
        parser.add_argument('--run', type=json.loads, help='grab coinstac args')
        localSite1_Data = ''' {
            "site1_Data": "Site_1_Mnist_X.txt",
            "site1_Label": "Site_1_Label.txt"
        } '''
        site1args = parser.parse_args(['--run', localSite1_Data])
        Site1Data = np.loadtxt(site1args.run["site1_Data"])

        # create combinded list by local and remote data
        combined_X = np.concatenate((shared_X, Site1Data), axis=0)
        combined_X = normalize_columns(combined_X)

        # create low dimensional position
        combined_Y = np.random.randn(combined_X.shape[0], no_dims)
        combined_Y[:shared_Y.shape[0], :] = shared_Y

        local_site1.Y, local_site1.dY, local_site1.iY, local_site1.gains, local_site1.P, local_site1.n = tsne(
            combined_X,
            combined_Y,
            local_site1.sharedRows,
            no_dims=no_dims,
            initial_dims=initial_dims,
            perplexity=perplexity,
            computation_phase=computation_phase)

        # save local site sharedIY data into file
        with open("site1SharedY.txt", "w") as f1:
            for i in range(0,  local_site1.sharedRows):
                f1.write(str(local_site1.Y[i][0]) + '\t')
                f1.write(str(local_site1.Y[i][1]) + '\n')

        # pass data to remote in json format
        localJsonY = ''' {"localSite1SharedY": "site1SharedY.txt"} '''
        sharedY = parser.parse_args(['--run', localJsonY])

        return (sharedY.run)


    if computation_phase is 'computation':
        parser = argparse.ArgumentParser(description='''read in coinstac args for local computation''')
        parser.add_argument('--run', type=json.loads, help='grab coinstac args')
        shared_Y = np.loadtxt(args["shared_Y"])
        local_site1.Y[:local_site1.sharedRows, :] = shared_Y;
        compAvgError1 = parser.parse_args(['--run', compAvgError])
        C = compAvgError1.run['output']['error']
        demeanAvg = (np.mean(local_site1.Y, 0));
        demeanAvg[0]= compAvgError1.run['output']['avgX']
        demeanAvg[1] = compAvgError1.run['output']['avgY']
        local_site1.Y = demeanL(local_site1.Y, demeanAvg)

        local_site1.Y, iY, local_site1.Q, C, local_site1.P = master_child(local_site1.Y, local_site1.dY, local_site1.iY, local_site1.gains, local_site1.n, local_site1.sharedRows, local_site1.P, iter, C)
        local_site1.Y[local_site1.sharedRows:, :] = updateL(local_site1.Y[local_site1.sharedRows:, :], local_site1.iY[local_site1.sharedRows:, :])

        # save local site sharedY data into file
        with open("site1SharedY.txt", "w") as f1:
            for i in range(0,local_site1.sharedRows):
                f1.write(str(local_site1.Y[i][0]) + '\t')
                f1.write(str(local_site1.Y[i][1]) + '\n')

        # pass data to remote in json format
        localJson = ''' {"localSite1SharedY": "site1SharedY.txt"} '''
        sharedY = parser.parse_args(['--run', localJson])

        # save local site sharedIY data into file
        with open("site1SharedIY.txt", "w") as f1:
            for i in range(0, local_site1.sharedRows):
                f1.write(str(local_site1.iY[i][0]) + '\t')
                f1.write(str(local_site1.iY[i][1]) + '\n')

        # pass data to remote in json format
        localJsonIY = ''' {"localSite1SharedIY": "site1SharedIY.txt"} '''
        sharedIY = parser.parse_args(['--run', localJsonIY])


        meanValue = (np.mean(local_site1.Y, 0));
        comp = {'output': {'MeanX' : meanValue[0], 'MeanY' : meanValue[1], 'error': C}}

        return (sharedY.run, sharedIY.run, json.dumps(comp, sort_keys=True,indent=4,separators=(',' ,':')))

    if computation_phase is 'final':
        '''It will add only local site data in the dictionary'''

        parser = argparse.ArgumentParser(description='''read in coinstac args for local computation''')
        parser.add_argument('--run', type=json.loads, help='grab coinstac args')
        with open("local_site1Y.txt", "w") as f1:
            for i in range(local_site1.sharedRows, len(local_site1.Y)):
                f1.write(str(local_site1.Y[i][0]) + '\t')
                f1.write(str(local_site1.Y[i][1]) + '\n')

            localJsonY = ''' {"localSite1": "local_site1Y.txt"} '''
            sharedY = parser.parse_args(['--run', localJsonY])

        return(sharedY.run)
예제 #3
0
def local_1(args):
    ''' It will load local data and download remote data and
    place it on top. Then it will run tsne on combined data(shared + local)
    and return low dimensional shared Y and IY

       args (dictionary): {
           "shared_X" (str): file path to remote site data,
           "shared_Label" (str): file path to remote site labels
           "no_dims" (int): Final plotting dimensions,
           "initial_dims" (int): number of dimensions that PCA should produce
           "perplexity" (int): initial guess for nearest neighbor
           "shared_Y" (str):  the low-dimensional remote site data
           }


       Returns:
           computation_phase(local): It will return only low dimensional
           shared data from local site
           computation_phase(final): It will return only low dimensional
           local site data
           computation_phase(computation): It will return only low
           dimensional shared data Y and corresponding IY
       '''

    shared_X = np.load(os.path.join(args['state']['baseDirectory'],
                                    args['input']['shared_X']),
                       allow_pickle=True)
    shared_Y = np.load(os.path.join(args['state']['baseDirectory'],
                                    args['input']['shared_y']),
                       allow_pickle=True)

    #raise Exception(shared_Y)

    no_dims = args["cache"]["no_dims"]
    initial_dims = args["cache"]["initial_dims"]
    perplexity = args["cache"]["perplexity"]
    sharedRows, sharedColumns = shared_X.shape

    with open(
            os.path.join(args["state"]["baseDirectory"],
                         'local_site_data.txt')) as fh:
        Site1Data = np.loadtxt(fh.readlines())

    Site1Data = np.asarray(Site1Data)

    # create combinded list by local and remote data
    combined_X = np.concatenate((shared_X, Site1Data), axis=0)
    combined_X = normalize_columns(combined_X)

    # create low dimensional position
    combined_Y = np.random.randn(combined_X.shape[0], no_dims)
    combined_Y[:shared_Y.shape[0], :] = shared_Y

    local_Y, local_dY, local_iY, local_gains, local_P, local_n = tsne(
        combined_X,
        combined_Y,
        sharedRows,
        no_dims=no_dims,
        initial_dims=initial_dims,
        perplexity=perplexity,
        computation_phase="local")

    local_shared_Y = local_Y[:shared_Y.shape[0], :]
    local_shared_IY = local_iY[:shared_Y.shape[0], :]

    # Save file for transferring to remote
    np.save(
        os.path.join(args['state']['transferDirectory'], 'local_shared_Y.npy'),
        local_shared_Y)
    np.save(
        os.path.join(args['state']['transferDirectory'],
                     'local_shared_IY.npy'), local_shared_IY)

    #save file in local cache directory
    np.save(os.path.join(args['state']['cacheDirectory'], 'local_Y.npy'),
            local_Y)
    np.save(os.path.join(args['state']['cacheDirectory'], 'local_dY.npy'),
            local_dY)
    np.save(os.path.join(args['state']['cacheDirectory'], 'local_IY.npy'),
            local_iY)
    np.save(os.path.join(args['state']['cacheDirectory'], 'local_P.npy'),
            local_P)
    np.save(os.path.join(args['state']['cacheDirectory'], 'local_gains.npy'),
            local_gains)
    np.save(os.path.join(args['state']['cacheDirectory'], 'shared_Y.npy'),
            shared_Y)


    computation_output = \
        {
            "output": {
                "localSite1SharedY": 'local_shared_Y.npy',
                'computation_phase': 'local_1',
            },
            "cache": {
                "local_Y": 'local_Y.npy',
                "local_dY": 'local_dY.npy',
                "local_IY": 'local_IY.npy',
                "local_P": 'local_P.npy',
                "local_n": local_n,
                "local_gains": 'local_gains.npy',
                "shared_rows": sharedRows,
                "shared_Y": 'shared_Y.npy'
            }
        }

    return json.dumps(computation_output)
예제 #4
0
def local_1(args):
    ''' It will load local data and download remote data and
    place it on top. Then it will run tsne on combined data(shared + local)
    and return low dimensional shared Y and IY

       args (dictionary): {
           "shared_X" (str): file path to remote site data,
           "shared_Label" (str): file path to remote site labels
           "no_dims" (int): Final plotting dimensions,
           "initial_dims" (int): number of dimensions that PCA should produce
           "perplexity" (int): initial guess for nearest neighbor
           "shared_Y" (str):  the low-dimensional remote site data
           }


       Returns:
           computation_phase(local): It will return only low dimensional
           shared data from local site
           computation_phase(final): It will return only low dimensional
           local site data
           computation_phase(computation): It will return only low
           dimensional shared data Y and corresponding IY
       '''

    # corresponds to local
    #raise Exception( 'local_1 function startings')

    #shared_X = np.loadtxt('test/input/simulatorRun/shared_x.txt')
    shared_X = np.loadtxt('test/input/simulatorRun/mnist2500_X.txt')
    #shared_X = np.loadtxt('test/input/simulatorRun/test_high_dimensional_mnist_data.txt')
    shared_Y = np.array(args["input"]["shared_y"])
    #raise Exception(shared_Y.shape)
    no_dims = args["cache"]["no_dims"]
    initial_dims = args["cache"]["initial_dims"]
    perplexity = args["cache"]["perplexity"]
    sharedRows, sharedColumns = shared_X.shape

    #Site1Data = np.loadtxt('test/input/simulatorRun/site1_x.txt')
    #Site1Data = np.loadtxt('test/input/simulatorRun/site1_x_high_dimensions.txt')
    Site1Data = np.loadtxt('test/input/simulatorRun/test_high_dimensional_site_1_mnist_data.txt')

    # create combinded list by local and remote data
    combined_X = np.concatenate((shared_X, Site1Data), axis=0)
    combined_X = normalize_columns(combined_X)

    # create low dimensional position
    combined_Y = np.random.randn(combined_X.shape[0], no_dims)
    combined_Y[:shared_Y.shape[0], :] = shared_Y


    local_Y, local_dY, local_iY, local_gains, local_P, local_n = tsne(
        combined_X,
        combined_Y,
        sharedRows,
        no_dims=no_dims,
        initial_dims=initial_dims,
        perplexity=perplexity,
        computation_phase="local")
    local_shared_Y = local_Y[:shared_Y.shape[0], :]
    local_shared_IY = local_iY[:shared_Y.shape[0], :]
    #raise Exception(np.shape(local_shared_Y))
    #raise Exception(sys.getsizeof(local_shared_Y))


    computation_output = \
        {
            "output": {
                "localSite1SharedY": local_shared_Y.tolist(),
                "localSite1SharedIY": local_shared_IY.tolist(),
                'computation_phase': "local_1"
            },
            "cache": {
                "local_Y": local_Y.tolist(),
                "local_dY": local_dY.tolist(),
                "local_iY": local_iY.tolist(),
                "local_P": local_P.tolist(),
                "local_n": local_n,
                "local_gains": local_gains.tolist(),
                "shared_rows": sharedRows,
                "shared_y": shared_Y.tolist()
            }
        }



    return json.dumps(computation_output)
예제 #5
0
def remote_1(args):
    ''' It will receive parameters from dsne_multi_shot.
    After receiving parameters it will compute tsne on high
    dimensional remote data and pass low dimensional values
    of remote site data


       args (dictionary): {
            "shared_X" (str):  remote site data
            "shared_Label" (str): remote site labels
            "no_dims" (int): Final plotting dimensions
            "initial_dims" (int): number of dimensions that PCA should produce
            "perplexity" (int): initial guess for nearest neighbor
            "max_iter" (str):  maximum number of iterations during
                                tsne computation
            }
       computation_phase (string): remote

       normalize_columns:
           Shared data is normalized through this function

       Returns:
           Return args will contain previous args value in
           addition of Y[low dimensional Y values] values of shared_Y.
       args(dictionary):  {
           "shared_X" (str):  remote site data,
           "shared_Label" (str):  remote site labels
           "no_dims" (int): Final plotting dimensions,
           "initial_dims" (int): number of dimensions that PCA should produce
           "perplexity" (int): initial guess for nearest neighbor
           "shared_Y" : the low-dimensional remote site data
           }
       '''

    shared_X = np.loadtxt('test/input/simulatorRun/mnist2500_X.txt')
    shared_Labels = np.loadtxt('test/input/simulatorRun/mnist2500_labels.txt')
    #shared_X = np.loadtxt('test/input/simulatorRun/shared_x.txt')
    #shared_Labels = np.loadtxt('test/input/simulatorRun/shared_y.txt')
    #shared_X = np.loadtxt('test/input/simulatorRun/test_high_dimensional_mnist_data.txt')
    #shared_Labels = np.loadtxt('test/input/simulatorRun/test_high_dimensional_mnist_label.txt')

    no_dims = args["input"]["local0"]["no_dims"]
    initial_dims = args["input"]["local0"]["initial_dims"]
    perplexity = args["input"]["local0"]["perplexity"]
    max_iter = args["input"]["local0"]["max_iterations"]

    shared_X = normalize_columns(shared_X)
    (sharedRows, sharedColumns) = shared_X.shape

    init_Y = np.random.randn(sharedRows, no_dims)

    shared_Y = tsne(shared_X,
                    init_Y,
                    sharedRows,
                    no_dims,
                    initial_dims,
                    perplexity,
                    computation_phase="remote")
    #raise Exception(shared_X)

    computation_output = {
        "output": {
            "shared_y": shared_Y.tolist(),
            "computation_phase": 'remote_1',
        },
        "cache": {
            "shared_y": shared_Y.tolist(),
            "max_iterations": max_iter
        }
    }
    #raise Exception(shared_Y.shape)
    return json.dumps(computation_output)
예제 #6
0
def remote_1(args):
    ''' It will receive parameters from dsne_multi_shot.
    After receiving parameters it will compute tsne on high
    dimensional remote data and pass low dimensional values
    of remote site data


       args (dictionary): {
            "shared_X" (str):  remote site data
            "shared_Label" (str): remote site labels
            "no_dims" (int): Final plotting dimensions
            "initial_dims" (int): number of dimensions that PCA should produce
            "perplexity" (int): initial guess for nearest neighbor
            "max_iter" (str):  maximum number of iterations during
                                tsne computation
            }
       computation_phase (string): remote

       normalize_columns:
           Shared data is normalized through this function

       Returns:
           Return args will contain previous args value in
           addition of Y[low dimensional Y values] values of shared_Y.
       args(dictionary):  {
           "shared_X" (str):  remote site data,
           "shared_Label" (str):  remote site labels
           "no_dims" (int): Final plotting dimensions,
           "initial_dims" (int): number of dimensions that PCA should produce
           "perplexity" (int): initial guess for nearest neighbor
           "shared_Y" : the low-dimensional remote site data
           }
       '''

    with open(os.path.join(args["state"]["baseDirectory"],
                           'mnist2500_X.txt')) as fh:
        shared_X = np.loadtxt(fh.readlines())

    with open(
            os.path.join(args["state"]["baseDirectory"],
                         'mnist2500_labels.txt')) as fh1:
        shared_Labels = np.loadtxt(fh1.readlines())

    no_dims = args["input"]["local0"]["no_dims"]
    initial_dims = args["input"]["local0"]["initial_dims"]
    perplexity = args["input"]["local0"]["perplexity"]
    max_iter = args["input"]["local0"]["max_iterations"]

    shared_X = normalize_columns(shared_X)
    (sharedRows, sharedColumns) = shared_X.shape

    np.random.seed()
    init_Y = np.random.randn(sharedRows, no_dims)

    shared_Y = tsne(shared_X,
                    init_Y,
                    sharedRows,
                    no_dims,
                    initial_dims,
                    perplexity,
                    computation_phase="remote")

    np.save(os.path.join(args['state']['transferDirectory'], 'shared_Y.npy'),
            shared_Y)
    np.save(os.path.join(args['state']['cacheDirectory'], 'shared_Y.npy'),
            shared_Y)

    np.save(os.path.join(args['state']['transferDirectory'], 'shared_X.npy'),
            shared_X)

    computation_output = {
        "output": {
            "shared_y": 'shared_Y.npy',
            "shared_X": 'shared_X.npy',
            "computation_phase": 'remote_1',
        },
        "cache": {
            "shared_y": 'shared_Y.npy',
            "max_iterations": max_iter
        }
    }

    return json.dumps(computation_output)
예제 #7
0
def local_1(args):
    ''' It will load local data and download remote data and place it on top.
    Then it will run tsne on combined data(shared + local) and return
    low dimensional local site data

    args (dictionary): {
        "shared_X" (str): remote site data,
        "shared_Label" (str):  remote site labels
        "no_dims" (int): Final plotting dimensions,
        "initial_dims" (int): number of dimensions that PCA should produce
        "perplexity" (int): initial guess for nearest neighbor
        "shared_Y" (str):  the low-dimensional remote site data
        }
        computation_phase : local

    Returns:
        localY: It is the two dimensional value of only local site data
    '''

    shared_X = args["input"]["shared_x"]
    shared_Y = args["input"]["shared_y"]

    shared_X = np.array(shared_X)
    shared_Y = np.array(shared_Y)

    no_dims = args["cache"]["no_dims"]
    initial_dims = args["cache"]["initial_dims"]
    perplexity = args["cache"]["perplexity"]
    sharedRows, sharedColumns = shared_X.shape

    local_X = np.loadtxt('test/input/simulatorRun/site1_x.txt')
    #   local_Y = np.loadtxt('test/input/simulatorRun/site1_y.txt')
    (site1Rows, site1Columns) = local_X.shape

    # create combinded list by local and remote data.
    # In combined_X remote data will be placed on the
    # top of local site data
    combined_X = np.concatenate((shared_X, local_X), axis=0)
    combined_X = normalize_columns(combined_X)

    # create low dimensional position
    combined_Y = np.random.randn(combined_X.shape[0], no_dims)
    combined_Y[:shared_Y.shape[0], :] = shared_Y

    # local data computation in tsne. Basically here local indicates Combined data(remote data is placed on the top of local site data). Computation specifications are described in 'tsneFunctions'
    Y_plot = tsne(combined_X,
                  combined_Y,
                  sharedRows,
                  no_dims=no_dims,
                  initial_dims=initial_dims,
                  perplexity=perplexity,
                  computation_phase='local')

    local_embedding = Y_plot[shared_Y.shape[0]:, :]

    computation_output = {
        "output": {
            "computation_phase": 'local_site',
            "local_embedding": local_embedding.tolist()
        },
        "cache": {}
    }

    return json.dumps(computation_output)
예제 #8
0
def local_site(args, computation_phase):
    ''' It will load local data and download remote data and place it on top. Then it will run tsne on combined data(shared + local) and return low dimensional local site data

    args (dictionary): {
        "shared_X" (str): remote site data,
        "shared_Label" (str):  remote site labels
        "no_dims" (int): Final plotting dimensions,
        "initial_dims" (int): number of dimensions that PCA should produce
        "perplexity" (int): initial guess for nearest neighbor
        "shared_Y" (str):  the low-dimensional remote site data
        }
        computation_phase : local

    Returns:
        localY: It is the two dimensional value of only local site data
    '''

    shared_X = np.loadtxt(args["shared_X"])
    shared_Y = np.loadtxt(args["shared_Y"])
    no_dims = args["no_dims"]
    initial_dims = args["initial_dims"]
    perplexity = args["perplexity"]
    sharedRows, sharedColumns = shared_X.shape

    # load high dimensional local site data
    parser = argparse.ArgumentParser(
        description='''read in coinstac args for local computation''')
    parser.add_argument('--run', type=json.loads, help='grab coinstac args')

    # load local site data from local memory
    localSite1_Data = ''' {
        "site1_Data": "Site_1_Mnist_X.txt",
        "site1_Label": "Site_1_Label.txt"
    } '''
    site1args = parser.parse_args(['--run', localSite1_Data])
    Site1Data = np.loadtxt(site1args.run["site1_Data"])
    (site1Rows, site1Columns) = Site1Data.shape

    # create combinded list by local and remote data. In combined_X remote data will be placed on the top of local site data
    combined_X = np.concatenate((shared_X, Site1Data), axis=0)
    combined_X = normalize_columns(combined_X)

    # create low dimensional position
    combined_Y = np.random.randn(combined_X.shape[0], no_dims)
    combined_Y[:shared_Y.shape[0], :] = shared_Y

    # local data computation in tsne. Basically here local indicates Combined data(remote data is placed on the top of local site data). Computation specifications are described in 'tsneFunctions'
    Y_plot = tsne(
        combined_X,
        combined_Y,
        sharedRows,
        no_dims=no_dims,
        initial_dims=initial_dims,
        perplexity=perplexity,
        computation_phase=computation_phase)

    # save local site data into file
    with open("local_site1.txt", "w") as f1:
        for i in range(sharedRows, len(Y_plot)):
            f1.write(str(Y_plot[i][0]) + '\t')
            f1.write(str(Y_plot[i][1]) + '\n')

    # pass data to remote in json format
    localJson = ''' {"local": "local_site1.txt"} '''
    localY = parser.parse_args(['--run', localJson])

    return (localY.run)