Exemplo n.º 1
0
def site_specific_network_of_kinases_and_targets(f):
    header = [
        'kinase symbol',
        'target symbol',
        'kinase refseq',
        'target refseq',
        'target sequence position',
        'target amino acid'
    ]

    f.write('\t'.join(header) + '\n')
    for protein in tqdm(Protein.query, total=fast_count(Protein.query)):
        for site in protein.sites:
            for kinase in site.kinases:

                data = [
                    kinase.name,
                    protein.gene.name,
                    kinase.protein.refseq if kinase.protein else '',
                    protein.refseq,
                    site.position,
                    site.residue
                ]

                f.write('\t'.join(map(str, data)) + '\n')
Exemplo n.º 2
0
def mutations_affecting_ptm_sites(f, sources):

    header = [
        'gene',
        'refseq',
        'mutation position',
        'mutation alt',
        'mutation summary',
        'site position',
        'site residue'
    ]

    f.write('\t'.join(header) + '\n')
    for source in sources:
        mutation_details_model = source

        for mut_details in tqdm(yield_objects(mutation_details_model.query), total=fast_count(mutation_details_model.query)):
            mutation = mut_details.mutation
            if mutation.is_ptm():
                for site in mutation.get_affected_ptm_sites():
                    protein = mutation.protein
                    summary = mut_details.summary()
                    data = [
                        protein.gene.name,
                        protein.refseq,
                        mutation.position,
                        mutation.alt,
                        ', '.join(summary) if type(summary) is list else summary,
                        site.position,
                        site.residue
                    ]

                    f.write('\t'.join(map(str, data)) + '\n')
Exemplo n.º 3
0
    def export(self, path=None, only_primary_isoforms=False):
        """Export all mutations from this source in ActiveDriver compatible format.

        Source specific data export can be implemented with export_details method,
        while export_details_headers should provide names for respective headers.
        """
        from tqdm import tqdm
        tick = 0

        if not path:
            path = self.generate_export_path(only_primary_isoforms)

        header = [
            'gene', 'isoform', 'position',  'wt_residue', 'mut_residue'
        ] + self.export_details_headers()

        with gzip.open(path, 'wt') as f:
            f.write('\t'.join(header))

            for mutation in tqdm(yield_objects(self.model.query), total=fast_count(db.session.query(self.model))):
                tick += 1

                m = mutation.mutation

                if only_primary_isoforms and not m.protein.is_preferred_isoform:
                    continue

                dataset_specific = self.export_details(mutation)

                try:
                    ref = m.ref
                except IndexError:
                    print(
                        'Mutation: %s %s %s is exceeding the proteins sequence'
                        % (m.protein.refseq, m.position, m.alt)
                    )
                    ref = ''

                for instance in dataset_specific:
                    data = [
                        m.protein.gene.name, m.protein.refseq,
                        str(m.position), ref, m.alt
                    ] + instance

                    f.write('\n' + '\t'.join(data))

                    del data

                del mutation
                if tick % 10000 == 0:
                    import gc
                    gc.collect()
Exemplo n.º 4
0
 def interactions(self):
     return (fast_count(
         db.session.query(models.Site).join(
             models.Kinase, models.Site.kinases)) + fast_count(
                 db.session.query(models.Site).join(
                     models.KinaseGroup, models.Site.kinase_groups)))
Exemplo n.º 5
0
 def kinase_groups_covered(self):
     return fast_count(
         db.session.query(models.KinaseGroup).filter(
             models.KinaseGroup.sites.any()))
Exemplo n.º 6
0
 def confirmed_mutations_count(self):
     return fast_count(self.confirmed_mutations)
Exemplo n.º 7
0
 def count(self):
     return fast_count(self.query)
Exemplo n.º 8
0
    def export(self, path=None, only_primary_isoforms=False):
        """Export all mutations from this source in ActiveDriver compatible format.

        Source specific data export can be implemented with export_details method,
        while export_details_headers should provide names for respective headers.
        """
        from datetime import datetime
        import os
        from tqdm import tqdm
        export_time = datetime.utcnow()

        tick = 0

        if not path:
            directory = os.path.join('exported', 'mutations')
            os.makedirs(directory, exist_ok=True)

            name_template = '{model_name}{restrictions}-{date}.tsv.gz'

            name = name_template.format(
                model_name=self.model_name,
                restrictions=('-primary_isoforms_only'
                              if only_primary_isoforms else ''),
                date=export_time)
            path = os.path.join(directory, name)

        header = ['gene', 'isoform', 'position', 'wt_residue', 'mut_residue'
                  ] + self.export_details_headers()

        with gzip.open(path, 'wt') as f:
            f.write('\t'.join(header))

            for mutation in tqdm(yield_objects(self.model.query),
                                 total=fast_count(db.session.query(
                                     self.model))):
                tick += 1

                m = mutation.mutation

                if only_primary_isoforms and not m.protein.is_preferred_isoform:
                    continue

                dataset_specific = self.export_details(mutation)

                try:
                    ref = m.ref
                except IndexError:
                    print(
                        'Mutation: %s %s %s is exceeding the proteins sequence'
                        % (m.protein.refseq, m.position, m.alt))
                    ref = ''

                for instance in dataset_specific:
                    data = [
                        m.protein.gene.name, m.protein.refseq,
                        str(m.position), ref, m.alt
                    ] + instance

                    f.write('\n' + '\t'.join(data))

                    del data

                del mutation
                if tick % 10000 == 0:
                    import gc
                    gc.collect()