Exemplo n.º 1
0
def find_vstructures(bfile, pfile, gfile, cisfile, assoc0file, window, vfile,
                     startTraitIdx, nTraits):
    """
    running association scan

    input:
    pfile      :   phenotype file
    cfile      :   covariance file
    ffile      :   fixed effects file
    cisfile    :   file containing cis anchors
    assoc0file :   file containing the results from the initial association scan
    vfile      :   file containing v-structures
    """
    preader = phenoReader.PhenoReaderFile(pfile)
    greader = bedReader.BedReader(bfile)

    model = gnetlmm.GNetLMM(preader, greader, window=window)

    genecorr_reader = reader.FileReader(gfile + '.pv')
    model.set_genecorr_reader(genecorr_reader)

    assoc0Reader = reader.FileReader(assoc0file + '.pv')
    model.set_assoc0_reader(assoc0Reader)

    model.load_cis_anchors(cisfile)
    model.find_vstructures(startTraitIdx, nTraits)
    model.save_vstructures(vfile + '.csv')
Exemplo n.º 2
0
def gene_has_anchor(bfile,
                    pfile,
                    assoc0,
                    anchor_thresh,
                    anchorfile,
                    window,
                    cis=True):
    """
    tests if a gene has a cis anchor

    input:
    bfile           :   binary bed file (bfile.bed, bfile.bim and bfile.fam are required)
    pfile           :   phenotype file
    assoc0file      :   basefilename of initial association scan
    anchor_thresh   :   thrshold for anchor-associations
    anchor_file     :   filename for saving cis assocaitions
    window          :   maximal distance between cis-snp and gene
    cis             :   if set, look for cis-associations only (default: True)
    """
    preader = phenoReaderFile.PhenoReaderFile(pfile)
    greader = bedReader.BedReader(bfile)

    model = gnetlmm.GNetLMM(preader, greader, window=window)
    assoc0Reader = reader.FileReader(assoc0 + '.pv')
    model.set_assoc0_reader(assoc0Reader)
    model.gene_has_anchor(anchor_thresh, cis)
    model.save_anchors(anchorfile)
Exemplo n.º 3
0
def scan(bfile, pfile, cfile, ffile, vfile, assocfile, startTraitIdx, nTraits):
    """
    running association scan

    input:
    bfile      :   basefilename of plink file
    pfile      :   phenotype file
    cfile      :   covariance file
    ffile      :   fixed effects file
    vfile      :   file containing vstructures
    assocfile  :   file for saving results
    """
    K = None
    if cfile is not None:
        K = np.loadtxt(cfile)

    Covs = None
    if ffile is not None:
        Covs = np.loadtxt(ffile)

    preader = phenoReader.PhenoReaderFile(pfile)
    greader = bedReader.BedReader(bfile)
    model = gnetlmm.GNetLMM(preader, greader, Covs=Covs, K=K)
    model.load_vstructures(vfile + ".csv")
    model.update_associations(startTraitIdx, nTraits)
    model.save_updates(assocfile)
Exemplo n.º 4
0
def merge_assoc0_scan(assoc0file, nSnps, bfile):
    """
    merging associations files

    input:
    assoc0   :   basename of assoc0 results
    nSnps    :   number of SNPs in each block
    bfile    :   basename of bed file
    """
    greader = bedReader.BedReader(bfile)
    F = greader.get_nrows()

    fn_beta = []
    fn_pv = []

    f = 0
    while f < F:

        _fn = assoc0file + '.startSnp_%d' % f
        _fn_beta = '%s.beta.matrix' % _fn
        _fn_pv = '%s.pv.matrix' % _fn

        assert os.path.exists(_fn_beta), 'File %s is missing' % (_fn_beta)
        assert os.path.exists(_fn_pv), 'File %s is missing' % (_fn_pv)
        fn_beta.append(_fn_beta)
        fn_pv.append(_fn_pv)
        f += nSnps

    merge_files(fn_beta, assoc0file + '.beta.matrix')
    merge_files(fn_pv, assoc0file + '.pv.matrix')
Exemplo n.º 5
0
    def __init__(self, bfile, dist_min=100):
        """
        initializes object

        input:
        bfile              :   basefilename of plink file
        dist_causal_snps   :   minimal distance between two causal SNPs
        """
        self.dist_min = dist_min
        self.bfile = bfile

        self.genoreader = bedReader.BedReader(self.bfile)

        self.chrom = self.genoreader.getSnpChrom()
        self.pos = self.genoreader.getSnpPos()
        self.rs = self.genoreader.getSnpIds()

        self.N = self.genoreader.fam.shape[0]
        self.F = self.chrom.shape[0]
Exemplo n.º 6
0
def find_vstructures(bfile, pfile, gfile, anchorfile, assoc0file, window,
                     vfile, startTraitIdx, nTraits, corr_thresh, ind_thresh,
                     max_genes):
    """
    running association scan

    input:
    bfile      :   blink file
    pfile      :   phenotype file
  
    gfile      :   correlation file
    anchorfile :   file containing anchors
    assoc0file :   file containing the results from the initial association scan
    window     :   window size
    vfile      :   file containing v-structures

    startTraitIdx   :   index of first trait to be analyzed
    nTraits         :   number of traits to be analyzed

    corr_thresh   : q-value for calling a correlation significant
    ind_thresh    : q-value for calling a correlation not significant
    max_genes     : maximal number of genes in the conditioning set
    """
    preader = phenoReaderFile.PhenoReaderFile(pfile)
    greader = bedReader.BedReader(bfile)

    model = gnetlmm.GNetLMM(preader,
                            greader,
                            window=window,
                            thresh_corr=corr_thresh,
                            thresh_ind=ind_thresh)

    genecorr_reader = reader.FileReader(gfile + '.pv')
    model.set_genecorr_reader(genecorr_reader)

    assoc0Reader = reader.FileReader(assoc0file + '.pv')
    model.set_assoc0_reader(assoc0Reader)

    model.load_anchors(anchorfile)
    model.find_vstructures(startTraitIdx, nTraits, max_genes)

    if np.isfinite(nTraits): vfile += ".startTrait_%d" % startTraitIdx
    model.save_vstructures(vfile + '.csv')
Exemplo n.º 7
0
def plot_power(bfile, pfile, assoc0file, assocfile, plotfile, window):
    # reading out p-values
    score = {}
    assoc0Reader = reader.FileReader(assoc0file + '.pv')
    score['LMM'] = -np.log10(assoc0Reader.getMatrix())
    assocReader = reader.FileReader(assocfile + '.pv')
    score['GNetLMM'] = -np.log10(assocReader.getMatrix())

    # get network
    Agene = np.loadtxt(pfile + '.Agene')

    # gene info
    preader = phenoReader.PhenoReaderFile(pfile)
    gene_start = preader.getGeneStart()
    gene_end = preader.getGeneEnd()
    gene_chrom = preader.getGeneChrom()

    # snp info
    breader = bedReader.BedReader(bfile)
    snp_pos = breader.getSnpPos()
    snp_chrom = breader.getSnpChrom()

    P_cis, P_trans = get_groundtruth(Agene, snp_pos, snp_chrom, gene_start,
                                     gene_chrom, window)

    # delete cis-associations
    for key in score.keys():
        score[key][P_cis] = 0

    # compute receiver operator characteristics
    FPR = {}
    TPR = {}
    for key in score.keys():
        FPR[key], TPR[key] = roc.roc(P_trans, score[key])

    # plotting results
    plotting.plotROCcurve(['LMM', 'GNetLMM'],
                          TPR,
                          FPR,
                          xlim=(0, 0.05),
                          ylim=(0, 0.42),
                          fn=plotfile)
Exemplo n.º 8
0
def marginal_genecorr(bfile, pfile, gfile):
    """
    running marginal gene-gene correlations

    Input:
    bfile        :   binary bed file (bfile.bed, bfile.bim and bfile.fam are required)
    pfile        :   phenotype file
    cfile        :   covariance matrix file
    ffile        :   covariates file

    gfile        :   basename of output file 
    """
    preader = phenoReader.PhenoReaderFile(pfile)
    greader = bedReader.BedReader(bfile)
    model = gnetlmm.GNetLMM(preader, greader)
    corr, pv = model.marginal_gene_correlations()
    write = writer.Writer(gfile + '.pv')
    write.writeMatrix(pv, fmt='%.4e')
    write = writer.Writer(gfile + '.corr')
    write.writeMatrix(corr, fmt='%.4f')
Exemplo n.º 9
0
def initial_scan(bfile,
                 pfile,
                 cfile,
                 ffile,
                 assoc0file,
                 startSnpIdx=0,
                 nSnps=np.inf,
                 memory_efficient=False):
    """
    running initial scan using a standard linear mixed model

    Input:
    bfile        :   binary bed file (bfile.bed, bfile.bim and bfile.fam are required)
    pfile        :   phenotype file
    cfile        :   covariance matrix file
    ffile        :   covariates file

    assoc0file   :   basename of output file 
    """
    K = None
    if cfile is not None:
        K = np.loadtxt(cfile)

    Covs = None
    if ffile is not None:
        Covs = np.loadtxt(ffile)

    if np.isfinite(nSnps): assoc0file += ".startSnp_%d" % startSnpIdx

    preader = phenoReaderFile.PhenoReaderFile(pfile)
    greader = bedReader.BedReader(bfile)
    model = gnetlmm.GNetLMM(preader, greader, K=K, Covs=Covs)
    beta0, pv0 = model.initial_scan(startSnpIdx, nSnps, memory_efficient)

    write = writer.Writer(assoc0file + '.pv')
    write.writeMatrix(pv0, fmt='%.4e')

    write = writer.Writer(assoc0file + '.beta')
    write.writeMatrix(beta0, fmt='%.4f')
Exemplo n.º 10
0
def create_nice_output(vfile, bfile, pfile, assoc0file, assocfile, blockfile, outfile):
    """
    creating human readbable output file
    """
    preader = phenoReaderFile.PhenoReaderFile(pfile)
    greader =  bedReader.BedReader(bfile)

    snp_ids  = greader.getSnpIds()
    gene_ids = preader.getGeneIds()

    vstruct = vstructures.VstructureFile(vfile + '.csv')
    assoc0Reader = reader.FileReader(assoc0file + '.pv')
    assocReader  = assoc_results.AssocResultsList()
    assocReader.load_csv(assocfile + '.csv')

    blockReader = None
    if blockfile is not None: 
        blockReader = assoc_results.AssocResultsList()
        blockReader.load_csv(blockfile + '.csv')

    f = open(outfile + '.csv','w')
    if blockReader is None:
        header = "Anchor Snp\t Anchor Gene\t Focal Gene\t Orthogonal Genes\t pv(LMM) \t pv(GNetLMM)"
    else:
        header = "Anchor Snp\t Anchor Gene\t Focal Gene\t Orthogonal Genes\t pv(LMM) \t pv(GNetLMM) \t pv(BlockLMM)"

    if assocReader.var_snps is not None: header += '\t varSnp(GnetLMM)'
    if assocReader.var_covs is not None: header += '\t varCovs(GnetLMM)'
    if assocReader.var_genes is not None: header += '\t varGenes(GnetLMM)'

    if blockReader is not None:
        if blockReader.var_snps is not None: header += '\t varSnp(BlockLMM)'
        if blockReader.var_covs is not None: header += '\t varCovs(BlockLMM)'
        if blockReader.var_genes is not None: header += '\t varGenes(BlockLMM)'

    header += "\n"
    f.write(header)
    for idx_focal_gene, idx_anchor_snp, idx_orth_gene, idx_anchor_gene in vstruct.iterator(full=True):

        focal_gene = ",".join(gene_ids[idx_focal_gene])
        orth_gene = ",".join(gene_ids[idx_orth_gene])
        anchor_snps = snp_ids[idx_anchor_snp]
        
        for i in range(len(anchor_snps)):

            try:
                anchor_gene = gene_ids[idx_anchor_gene[i]]
            except:
                anchor_gene = ""

        
            pv_lmm = assoc0Reader.getRows([idx_anchor_snp[i]])[:,idx_focal_gene][0,0]

            idx_gnet = np.logical_and(assocReader.focal_gene==idx_focal_gene, assocReader.snp_anchor==idx_anchor_snp[i])
            if blockReader is not None:
                idx_block = np.logical_and(blockReader.focal_gene==idx_focal_gene, blockReader.snp_anchor==idx_anchor_snp[i])

            if blockReader is None:
                line = "%s\t%s\t%s\t%s\t%.4e\t%.4e"%(anchor_snps[i],anchor_gene,focal_gene, orth_gene, pv_lmm, assocReader.pv[idx_gnet])
            else:
                line = "%s\t%s\t%s\t%s\t%.4e\t%.4e\t%.4e"%(anchor_snps[i],anchor_gene,focal_gene, orth_gene, pv_lmm, assocReader.pv[idx_gnet], blockReader.pv[idx_block])

            if assocReader.var_snps is not None: line += '\t%.4e'%assocReader.var_snps[idx_gnet]
            if assocReader.var_covs is not None: line += '\t%.4e'%assocReader.var_covs[idx_gnet]
            if assocReader.var_genes is not None: line += '\t%.4e'%assocReader.var_genes[idx_gnet]

            if blockReader is not None:
                if blockReader.var_snps is not None: line += '\t%.4e'%blockReader.var_snps[idx_block]
                if blockReader.var_covs is not None: line += '\t%.4e'%blockReader.var_covs[idx_block]
                if blockReader.var_genes is not None: line += '\t%.4e'%blockReader.var_genes[idx_block]

            line += "\n"
            f.write(line)


    f.close()