Exemplo n.º 1
0
 def _get_seq_motif(self, refseq_id, residue, pos_str):
     seq = self.seq_dict[refseq_id]
     pos_1ix = int(pos_str)
     pos_0ix = pos_1ix - 1
     if seq[pos_0ix] != residue:
         self.invalid_site_pos.append((refseq_id, residue, pos_str))
         if seq[pos_0ix + 1] == residue:
             self.off_by_one.append((refseq_id, residue, pos_str))
             motif, respos = \
                ProtMapper.motif_from_position_seq(seq, pos_1ix + 1,
                                                   self.motif_window)
             return {
                 'site_motif': {
                     'motif': motif,
                     'respos': respos,
                     'off_by_one': True
                 }
             }
         else:
             return {}
     else:
         # The index of the residue at the start of the window
         motif, respos = ProtMapper.motif_from_position_seq(
             seq, pos_1ix, self.motif_window)
         return {
             'site_motif': {
                 'motif': motif,
                 'respos': respos,
                 'off_by_one': False
             }
         }
Exemplo n.º 2
0
def count_kin_sub_mapped(human_peptides, site_dict):
    mapped_sites = []
    in_human = 0
    not_in_human = 0
    annot = 0
    total_annot = 0
    import random
    random.shuffle(human_peptides)
    for ix, (hum_up_id, peptide, site_pos) in enumerate(human_peptides):
        print(ix)
        pm = ProtMapper()
        ms = pm.map_peptide_to_human_ref(hum_up_id, 'uniprot', peptide,
                                         site_pos)
        if ms.valid:
            in_human += 1
            # Check if the human site is annotated
            site_key = (ms.up_id, ms.mapped_res, ms.mapped_pos)
            if site_key in site_dict:
                annot += 1
                num_kinases = len(site_dict[site_key])
                total_annot += num_kinases
        else:
            not_in_human += 1
            #print("Not in human:", hum_up_id, peptide, site_pos)
        mapped_sites.append(ms)
        print("Not hum", not_in_human, "hum", in_human, "annot", annot,
              "total_annot", total_annot)
    return mapped_sites
Exemplo n.º 3
0
def map_bel_sites():
    with open(BEL_AGENTS, 'rb') as f:
        bel_agents = pickle.load(f)
    pm = ProtMapper(use_cache=True, cache_path=CACHE_PATH)
    bel_sites = map_agents(BEL_AGENTS, pm, 'bel')
    with open(BEL_SITES, 'wb') as f:
        pickle.dump(bel_sites, f)
    pm.save_cache()
Exemplo n.º 4
0
def build_data(data_sites, dm_opt):
    pm = ProtMapper()
    filt_sites = []
    for site in data_sites:
        (refseq, gene, res, pos, pep, respos) = site
        hgnc_name, up_id = up_for_hgnc(gene)
        if dm_opt is True and up_id is not None:
            ms = pm.map_peptide_to_human_ref(up_id, 'uniprot', pep,
                                             int(respos) + 1)
            if ms.valid and ms.mapped_res and ms.mapped_pos:
                res = ms.mapped_res
                pos = ms.mapped_pos
            site = (refseq, hgnc_name, res, pos, pep, respos)
        filt_sites.append(site)
    return filt_sites
Exemplo n.º 5
0
def valid_counts(sitelist):
    results = []
    pm = ProtMapper()
    for ix, (refseq, gene, res, pos, pep, respos) in enumerate(sitelist):
        result = {'refseq': refseq, 'gene': gene, 'res': res, 'pos': pos,
                  'peptide': pep, 'respos': respos}
        if ix % 10000 == 0:
            print(ix)
        up_rs = up_id_for_rs(refseq)
        hgnc_name, up_hgnc = up_for_hgnc(gene)
        result['up_hgnc'] = up_hgnc
        result['up_rs'] = up_rs
        if up_rs is None:
            result['up_rs_iso_specific'] = None
        else:
            result['up_rs_iso_specific'] = iso_specific(up_rs)
        hgnc_map_result = map_peptide(up_hgnc, res, pos, pep, respos, 'hgnc',
                                      pm)
        rs_map_result = map_peptide(up_rs, res, pos, pep, respos, 'rs',
                                    pm)
        result.update(hgnc_map_result)
        result.update(rs_map_result)
        results.append(result)

    df = pd.DataFrame.from_dict(results, orient='columns')
    return df
Exemplo n.º 6
0
def map_sites(sites_dict):
    """Tabulate valid, invalid, and mapped sites from a set of Agents."""
    site_map = {}
    pm = ProtMapper()
    for site_ix, site in enumerate(sites_dict.keys()):
        if site_ix % 1000 == 0:
            print('%d of %d' % (site_ix, len(sites_dict)))
        up_id, res, pos = site
        try:
            ms = pm.map_to_human_ref(up_id, 'uniprot', res, pos)
            site_map[site] = ms
        except Exception as e:
            logger.exception(e)
            logger.info("up_id: %s, res %s, pos %s" % (up_id, res, pos))
    # Now that we've collected a list of all the sites, tabulate frequencies
    return site_map
Exemplo n.º 7
0
def map_pc_sites():
    pm = ProtMapper(use_cache=True, cache_path=CACHE_PATH)
    agent_files = {
        'hprd': 'output/biopax/PathwayCommons10.hprd.BIOPAX.pkl',
        'kegg': 'output/biopax/PathwayCommons10.kegg.BIOPAX.pkl',
        'panther': 'output/biopax/PathwayCommons10.panther.BIOPAX.pkl',
        'pid': 'output/biopax/PathwayCommons10.pid.BIOPAX.pkl',
        'psp_pc': 'output/biopax/PathwayCommons10.psp.BIOPAX.pkl',
        'pc_tsv': 'output/psp_kinase_substrate_tsv.pkl',
        'reactome': 'output/biopax/PathwayCommons10.reactome.BIOPAX.pkl',
        'wp': 'output/biopax/PathwayCommons10.wp.BIOPAX.pkl',
        'psp_biopax': 'output/biopax/Kinase_substrates.pkl',
        #'reactome_human': 'output/biopax/Homo_sapiens.pkl',
    }
    all_sites = {}
    for db_name, agent_file in agent_files.items():
        sites = map_agents(agent_file, pm, db_name)
        all_sites[db_name] = sites
    with open(BIOPAX_SITES_BY_DB, 'wb') as f:
        pickle.dump(all_sites, f)
    pm.save_cache()
Exemplo n.º 8
0
 def _get_seq_motif(self, refseq_id, residue, pos_str):
     seq = self.seq_dict[refseq_id]
     pos_1ix = int(pos_str)
     pos_0ix = pos_1ix - 1
     if seq[pos_0ix] != residue:
         self.invalid_site_pos.append((refseq_id, residue, pos_str))
         if seq[pos_0ix + 1] == residue:
             self.off_by_one.append((refseq_id, residue, pos_str))
             motif, respos = \
                ProtMapper.motif_from_position_seq(seq, pos_1ix + 1,
                                                   self.motif_window)
             return {'site_motif': {'motif': motif, 'respos': respos,
                                    'off_by_one': True}}
         else:
             return {}
     else:
         # The index of the residue at the start of the window
         motif, respos = ProtMapper.motif_from_position_seq(seq, pos_1ix,
                                                          self.motif_window)
         return {'site_motif': {'motif': motif, 'respos': respos,
                                'off_by_one': False}}
Exemplo n.º 9
0
import json
from flask import Flask, request, abort, Response, jsonify
from flask_cors import CORS
from protmapper import ProtMapper

app = Flask(__name__)
CORS(app)

optional_bool_args = {
    'do_methionine_offset': True,
    'do_orthology_mapping': True,
    'do_isoform_mapping': True
}
pm = ProtMapper()


@app.route('/map_to_human_ref', methods=['GET', 'POST'])
def map_to_human_ref():
    required_args = ('prot_id', 'prot_ns', 'residue', 'position')

    # Require all required arguments
    for arg in required_args:
        if request.json.get(arg) is None:
            abort(Response('The required argument "%s" is missing.' % arg,
                           400))
    # Now set the required arguments and the optional ones with default values
    # as backup
    arg_values = {key: request.json.get(key) for key in required_args}
    for arg, default_value in optional_bool_args.items():
        value = request.json.get(arg, default_value)
        arg_values[arg] = value