Exemplo n.º 1
0
def max_cover_pdb(protein, pdbch, top=-1):
    """ Input a list of PDB chain pair for a protein
        Return a sorted list with the same coverage but less members
    """
    pdbch = sorted(list(pdbch))
    from map_pdb_res import pdblist_to_uniprot

    pdbmap = pdblist_to_uniprot([pdb for pdb, ch in pdbch])
    newlist = []
    covres = set()
    while len(pdbch) > 0:  ## all checked
        best_cov = covres
        best_pdbch = None
        for pdb, ch in pdbch:
            newres = set()
            for pdbres in pdbmap:
                if pdbres.startswith(pdb + ":" + ch + ":"):
                    prores = pdbmap[pdbres]
                    if prores.startswith(protein):  ## same protein
                        newres.add(prores)
            if len(covres | newres) > len(best_cov):
                best_cov = covres | newres
                best_pdbch = pdb, ch
        if best_pdbch == None:  ## maximum coverage
            break
        pdbch.remove(best_pdbch)
        if len(best_cov) > len(covres):  ## improved
            newlist.append(best_pdbch)
            if len(newlist) == top:
                break
            covres = best_cov
    return newlist
Exemplo n.º 2
0
def max_cover_pdb(protein, pdbch, top=-1):
    ''' Input a list of PDB chain pair for a protein
        Return a sorted list with the same coverage but less members
    '''
    pdbch = sorted(list(pdbch))
    from map_pdb_res import pdblist_to_uniprot
    pdbmap = pdblist_to_uniprot([pdb for pdb, ch in pdbch])
    newlist = []
    covres = set()
    while len(pdbch) > 0:  ## all checked
        best_cov = covres
        best_pdbch = None
        for pdb, ch in pdbch:
            newres = set()
            for pdbres in pdbmap:
                if pdbres.startswith(pdb + ':' + ch + ':'):
                    prores = pdbmap[pdbres]
                    if prores.startswith(protein):  ## same protein
                        newres.add(prores)
            if len(covres | newres) > len(best_cov):
                best_cov = covres | newres
                best_pdbch = pdb, ch
        if best_pdbch == None:  ## maximum coverage
            break
        pdbch.remove(best_pdbch)
        if len(best_cov) > len(covres):  ## improved
            newlist.append(best_pdbch)
            if len(newlist) == top:
                break
            covres = best_cov
    return newlist
Exemplo n.º 3
0
def combine_pdb_residue(filename, listname=None, outname=None):
    ''' Combine the features vectors from prepare_feature functions
        by taking the maximum values for PDB residues mapped to the same
        resiude in a protein pair.
        Output is a file with the same format
    '''
    if listname == None:
        listname = filename.replace('.fea','.map')
    if outname == None:
        outname = filename + '.max'
    data = []
    pdblist = set()
    with open(filename, 'r') as tempfile:
        for line in tempfile:
            ele = line.split('\t')
            info = ele[0]
            pp, idx, res = info.split(',')
            pdblist.add(res.split(':')[0])
            vals = [float(val) for val in ele[1:]]
            data.append([pp, res, vals])
    from map_pdb_res import pdblist_to_uniprot
    res_map = pdblist_to_uniprot(pdblist)
    sup_map = {} ## supplimentary residue map from the input list
    with open(listname, 'r') as tempfile:
        for line in tempfile:
            p,s,c = line.split('\t')
            sup_map[s] = p
    comb = {}
    for pp, res, vals in data:
        if res in res_map:
            res = res_map[res]
        if res.count(':') == 2:
            pdb, ch, pos = res.split(':')
            if pdb in sup_map:
                res = sup_map[pdb]+':'+pos
        if (pp,res) in comb:
            maxv = []
            for i,j in zip(comb[(pp,res)], vals):
                if i == float('nan'):
                    maxv.append(j)
                elif j == float('nan'):
                    maxv.append(i)
                else:
                    maxv.append(max(i,j))
            comb[(pp,res)] = maxv
        else:
            comb[(pp,res)] = vals
    with open(outname, 'w') as tempfile:
        for pp,res in sorted(comb.keys()):
            tempfile.write('%s,0,%s'%(pp,res))
            for val in comb[(pp,res)]:
                tempfile.write('\t%s'%val)
            tempfile.write('\n')
    return outname
Exemplo n.º 4
0
def map_pdb_residue(filename, listname, useidx=1):
    ''' Input file format:
            for each line:
                p1=p2,index,pdb:chain:pos   value others
        Output format:
            a list:
                p1=p2, residue, max(value)
    '''
    data = []
    pdblist = set()
    infile = open(filename, 'r')
    for line in infile:
        ele = line.split('\t')
        g1g2, pdbidx, pdbres = ele[0].split(',')
        has_na = False
        for v in ele:
            if v.lower().startswith('na'):
                has_na = True
        if has_na:
            continue
        value = ele[useidx].strip()
        data.append((g1g2, pdbres, float(value)))
        pdblist.add(pdbres.split(':')[0])
    infile.close()
    from map_pdb_res import pdblist_to_uniprot
    res_map = pdblist_to_uniprot(pdblist)
    sup_map = {} ## supplimentary residue map from the input list
    with open(listname, 'r') as tempfile:
        for line in tempfile:
            p,s,c = line.split('\t')
            sup_map[s] = p
    comb = {}
    for pp, res, val in data:
        if res in res_map:
            res = res_map[res]
        elif res.count(':') == 2:
            pdb, ch, pos = res.split(':')
            if pdb in sup_map:
                res = sup_map[pdb]+':'+pos
        if (pp, res) in comb and comb[(pp, res)] > val:
            continue ## no need to update if having a larger value
        comb[(pp, res)] = val
    output = []
    for pp, res in comb:
        output.append((pp, res, comb[(pp, res)]))
    output.sort()
    return output