Exemplo n.º 1
0
def NBS_single(sm_mat,
               regNet_glap,
               propNet=None,
               propNet_kernel=None,
               k=3,
               verbose=False,
               **kwargs):
    # Check for correct input data
    if type(sm_mat) != pd.DataFrame:
        raise TypeError(
            'Somatic mutation data must be given as Pandas DataFrame')
    if propNet is not None:
        if type(propNet) != nx.Graph:
            raise TypeError('Networkx graph object required for propNet')
    if regNet_glap is not None:
        if type(regNet_glap) != pd.DataFrame:
            raise TypeError(
                'netNMF regularization network laplacian (regNet_glap) must be given as Pandas DataFrame'
            )

    # Load or set subsampling parameters
    pats_subsample_p, gene_subsample_p, min_muts = 0.8, 0.8, 10
    if 'pats_subsample_p' in kwargs:
        pats_subsample_p = float(kwargs['pats_subsample_p'])
    if 'gene_subsample_p' in kwargs:
        gene_subsample_p = float(kwargs['gene_subsample_p'])
    if 'min_muts' in kwargs:
        min_muts = int(kwargs['min_muts'])

    # Subsample Data
    sm_mat_subsample = core.subsample_sm_mat(sm_mat,
                                             propNet=propNet,
                                             pats_subsample_p=pats_subsample_p,
                                             gene_subsample_p=gene_subsample_p,
                                             min_muts=min_muts)
    if verbose:
        print 'Somatic mutation data sub-sampling complete'

    # Throw exception if subsampling returned empty dataframe
    if sm_mat_subsample.shape[0] == 0:
        raise ValueError(
            'Subsampled somatic mutation matrix contains no patients.')

    # Propagate data if network object is provided
    if propNet is not None:
        # Determine if propagation is can be based on pre-computed propagation kernel
        if propNet_kernel is None:
            # If kernel is not given and some propagation parameters are given in kwargs, set propagation parameters
            # Otherwise set default values
            alpha, symmetric_norm, save_prop = 0.7, False, False
            if 'prop_alpha' in kwargs:
                alpha = float(kwargs['prop_alpha'])
            if 'prop_symmetric_norm' in kwargs:
                symmetric_norm = ((kwargs['prop_symmetric_norm'] == 'True') |
                                  (kwargs['prop_symmetric_norm'] == True))
            if 'save_prop' in kwargs:
                save_prop = ((kwargs['save_prop'] == 'True') |
                             (kwargs['save_prop'] == True))
            # Save propagation step data if desired (indicated in kwargs)
            if save_prop:
                prop_sm_data = prop.network_propagation(
                    propNet,
                    sm_mat_subsample,
                    alpha=alpha,
                    symmetric_norm=symmetric_norm,
                    **kwargs)
            else:
                prop_sm_data = prop.network_propagation(
                    propNet,
                    sm_mat_subsample,
                    alpha=alpha,
                    symmetric_norm=symmetric_norm)
        else:
            # Save propagation step data if desired (indicated in kwargs)
            save_prop = False
            if 'save_prop' in kwargs:
                save_prop = ((kwargs['save_prop'] == 'True') |
                             (kwargs['save_prop'] == True))
            if save_prop:
                prop_sm_data = prop.network_kernel_propagation(
                    propNet, propNet_kernel, sm_mat_subsample, **kwargs)
            else:
                prop_sm_data = prop.network_kernel_propagation(
                    propNet, propNet_kernel, sm_mat_subsample)
        if verbose:
            print 'Somatic mutation data propagated'
    else:
        prop_sm_data = sm_mat_subsample
        if verbose:
            print 'Somatic mutation data not propagated'

    # Quantile Normalize Data
    qnorm_data = True
    if 'qnorm_data' in kwargs:
        qnorm_data = ((kwargs['qnorm_data'] == 'True') |
                      (kwargs['qnorm_data'] == True))
    if qnorm_data:
        prop_data_qnorm = core.qnorm(prop_sm_data)
        if verbose:
            print 'Somatic mutation data quantile normalized'
    else:
        prop_data_qnorm = prop_sm_data
        if verbose:
            print 'Somatic mutation data not quantile normalized'

    # Prepare data for mixed netNMF function (align propagated profile columns with regularization network laplacian rows)
    if propNet is not None:
        propNet_nodes = list(propNet.nodes)
        data_arr = np.array(prop_data_qnorm.T.ix[propNet_nodes])
        regNet_glap_arr = np.array(
            regNet_glap.ix[propNet_nodes][propNet_nodes])
    else:
        propNet_nodes = list(regNet_glap.index)
        data_arr = np.array(prop_data_qnorm.T.ix[propNet_nodes].fillna(0))
        regNet_glap_arr = np.array(regNet_glap)

    # Set netNMF parameters from kwargs if given, otherwise use defaults
    netNMF_lambda, netNMF_maxiter, netNMF_verbose = 200, 250, False
    netNMF_eps, netNMF_err_tol, netNMF_err_delta_tol = 1e-15, 1e-4, 1e-8
    if 'netNMF_lambda' in kwargs:
        netNMF_lambda = float(kwargs['netNMF_lambda'])
    if 'netNMF_maxiter' in kwargs:
        netNMF_maxiter = int(kwargs['netNMF_maxiter'])
    if 'netNMF_eps' in kwargs:
        netNMF_eps = float(kwargs['netNMF_eps'])
    if 'netNMF_err_tol' in kwargs:
        netNMF_err_tol = float(kwargs['netNMF_err_tol'])
    if 'netNMF_err_delta_tol' in kwargs:
        netNMF_err_delta_tol = float(kwargs['netNMF_err_delta_tol'])

    # Mixed netNMF Result
    W, H, numIter, finalResid = core.mixed_netNMF(
        data_arr,
        regNet_glap_arr,
        k=k,
        l=netNMF_lambda,
        maxiter=netNMF_maxiter,
        eps=netNMF_eps,
        err_tol=netNMF_err_tol,
        err_delta_tol=netNMF_err_delta_tol,
        verbose=False)

    # Return netNMF result (dimension-reduced propagated patient profiles)
    H_df = pd.DataFrame(H.T, index=prop_data_qnorm.index)

    # Save netNMF result
    # Saving the propagation result
    if 'outdir' in kwargs:
        if 'job_name' in kwargs:
            if 'iteration_label' in kwargs:
                save_path = kwargs['outdir'] + str(
                    kwargs['job_name']) + '_H_' + str(
                        kwargs['iteration_label']) + '.csv'
            else:
                save_path = kwargs['outdir'] + str(
                    kwargs['job_name']) + '_H.csv'
        else:
            if 'iteration_label' in kwargs:
                save_path = kwargs['outdir'] + 'H_' + str(
                    kwargs['iteration_label']) + '.csv'
            else:
                save_path = kwargs['outdir'] + 'H.csv'
        H_df.to_csv(save_path)
        if verbose:
            print 'H matrix saved:', save_path
    else:
        pass
    if verbose:
        print 'pyNBS iteration complete'
    return H_df
Exemplo n.º 2
0
def NBS_single(sm_mat,
               options,
               propNet=None,
               propNet_kernel=None,
               regNet_glap=None,
               verbose=True,
               save_path=None):
    # Set default NBS netNMF options
    NBS_options = {
        'pats_subsample_p': 0.8,
        'gene_subsample_p': 0.8,
        'min_muts': 10,
        'prop_data': True,
        'prop_alpha': 0.7,
        'prop_symmetric_norm': False,
        'qnorm_data': True,
        'netNMF_k': 4,
        'netNMF_gamma': 200,
        'netNMF_update_gamma': False,
        'netNMF_gamma_factor': 1,
        'netNMF_niter': 250,
        'netNMF_eps': 1e-15,
        'netNMF_err_tol': 1e-4,
        'netNMF_err_delta_tol': 1e-4
    }

    # Update NBS netNMF options
    for option in options:
        NBS_options[option] = options[option]
    if verbose:
        print 'NBS options set:'
        for option in NBS_options:
            print '\t', option + ':', NBS_options[option]

    # Check for correct input data
    if NBS_options['prop_data']:
        if type(propNet) != nx.Graph:
            raise TypeError('Networkx graph object required for propNet')
    if (NBS_options['netNMF_gamma'] != 0):
        if type(regNet_glap) != pd.DataFrame:
            raise TypeError(
                'netNMF regularization network laplacian (regNet_glap) must be given as Pandas DataFrame'
            )

    # Subsample Data
    sm_mat_subsample = core.subsample_sm_mat(
        sm_mat,
        propNet=propNet,
        pats_subsample_p=NBS_options['pats_subsample_p'],
        gene_subsample_p=NBS_options['gene_subsample_p'],
        min_muts=NBS_options['min_muts'])
    if verbose:
        print 'Somatic mutation data sub-sampling complete'

    # Propagate Data
    if NBS_options['prop_data']:
        if propNet_kernel is None:
            prop_sm_data = prop.network_propagation(
                propNet,
                sm_mat_subsample,
                symmetric_norm=NBS_options['prop_symmetric_norm'],
                alpha=NBS_options['prop_alpha'],
                verbose=verbose)
        else:
            prop_sm_data = prop.network_kernel_propagation(propNet,
                                                           propNet_kernel,
                                                           sm_mat_subsample,
                                                           verbose=verbose,
                                                           save_path=None)
        if verbose:
            print 'Somatic mutation data propagated'
    else:
        prop_sm_data = sm_mat_subsample
        print 'Somatic mutation data not propagated'

    # Quantile Normalize Data
    if NBS_options['qnorm_data']:
        prop_data_qnorm = core.qnorm(prop_sm_data)
        if verbose:
            print 'Somatic mutation data quantile normalized'
    else:
        prop_data_qnorm = prop_sm_data
        print 'Somatic mutation data not quantile normalized'

    # Prepare data for mixed netNMF function
    propNet_nodes = propNet.nodes()
    data_arr = np.array(prop_data_qnorm.T.ix[propNet_nodes])
    regNet_glap_arr = np.array(regNet_glap.ix[propNet_nodes][propNet_nodes])
    # Mixed netNMF Result
    W, H, numIter, finalResid = core.mixed_netNMF(
        data_arr,
        regNet_glap_arr,
        NBS_options['netNMF_k'],
        W_init=None,
        H_init=None,
        gamma=NBS_options['netNMF_gamma'],
        update_gamma=NBS_options['netNMF_update_gamma'],
        gamma_factor=NBS_options['netNMF_gamma_factor'],
        niter=NBS_options['netNMF_niter'],
        eps=NBS_options['netNMF_eps'],
        err_tol=NBS_options['netNMF_err_tol'],
        err_delta_tol=NBS_options['netNMF_err_delta_tol'],
        verbose=verbose,
        debug_mode=False)

    # Save netNMF Result
    H_df = pd.DataFrame(H.T, index=prop_data_qnorm.index)
    if save_path is not None:
        H_df.to_csv(save_path)
        if verbose:
            print 'netNMF result saved:', save_path
    return H_df