Exemplo n.º 1
0
 def _build_protein_index(self):
     hypothesis_id = self.hypothesis_id
     theoretical_counts = self.session.query(
         Protein.name, Protein.id,
         func.count(Glycopeptide.id)).join(Glycopeptide).group_by(
             Protein.id).filter(
                 Protein.hypothesis_id == hypothesis_id).all()
     matched_counts = self.session.query(
         Protein.name, Protein.id, func.count(
             IdentifiedGlycopeptide.id)).join(Protein.glycopeptides).join(
                 IdentifiedGlycopeptide, IdentifiedGlycopeptide.structure_id
                 == Glycopeptide.id).group_by(Protein.id).filter(
                     IdentifiedGlycopeptide.ms2_score > self.threshold,
                     IdentifiedGlycopeptide.analysis_id ==
                     self.analysis_id).all()
     listing = []
     index = {}
     for protein_name, protein_id, glycopeptide_count in theoretical_counts:
         index[protein_id] = {
             "protein_name": protein_name,
             "protein_id": protein_id,
         }
     for protein_name, protein_id, glycopeptide_count in matched_counts:
         entry = index[protein_id]
         entry['identified_glycopeptide_count'] = glycopeptide_count
         listing.append(entry)
     self.protein_index = sorted(
         listing,
         key=lambda x: x["identified_glycopeptide_count"],
         reverse=True)
     for protein_entry in self.protein_index:
         protein_entry['protein'] = self.session.query(Protein).get(
             protein_entry["protein_id"])
     return self.protein_index
Exemplo n.º 2
0
 def _build_protein_index(self):
     hypothesis_id = self.hypothesis_id
     theoretical_counts = self.session.query(Protein.name, Protein.id, func.count(Glycopeptide.id)).join(
         Glycopeptide).group_by(Protein.id).filter(
         Protein.hypothesis_id == hypothesis_id).all()
     matched_counts = self.session.query(Protein.name, Protein.id, func.count(IdentifiedGlycopeptide.id)).join(
         Glycopeptide).join(
         IdentifiedGlycopeptide, IdentifiedGlycopeptide.structure_id == Glycopeptide.id).group_by(
         Protein.id).filter(
         IdentifiedGlycopeptide.ms2_score > self.threshold,
         IdentifiedGlycopeptide.analysis_id == self.analysis_id).all()
     listing = []
     index = {}
     for protein_name, protein_id, glycopeptide_count in theoretical_counts:
         index[protein_id] = {
             "protein_name": protein_name,
             "protein_id": protein_id,
         }
     for protein_name, protein_id, glycopeptide_count in matched_counts:
         entry = index[protein_id]
         entry['identified_glycopeptide_count'] = glycopeptide_count
         listing.append(entry)
     self.protein_index = sorted(listing, key=lambda x: x["identified_glycopeptide_count"], reverse=True)
     for protein_entry in self.protein_index:
         protein_entry['protein'] = self.session.query(Protein).get(protein_entry["protein_id"])
     return self.protein_index
Exemplo n.º 3
0
 def __len__(self):
     try:
         return self.hypothesis.parameters['database_size']
     except KeyError:
         stmt = select([func.count(self.identity_field)
                        ]).select_from(self.selectable)
         stmt = self._limit_to_hypothesis(stmt)
         return self.session.execute(stmt).scalar()
 def __len__(self):
     try:
         return self.hypothesis.parameters['database_size']
     except KeyError:
         stmt = select([func.count(self.identity_field)]).select_from(
             self.selectable)
         stmt = self._limit_to_hypothesis(stmt)
         return self.session.execute(stmt).scalar()
Exemplo n.º 5
0
 def _count_produced_glycopeptides(self):
     count = self.query(
         func.count(Glycopeptide.id)).filter(
         Glycopeptide.hypothesis_id == self.hypothesis_id).scalar()
     self.log("Generated %d glycopeptides" % count)
     self.set_parameters({
         "database_size": count
     })
     return count
Exemplo n.º 6
0
def summarize_glycopeptide_hypothesis(database_connection, hypothesis_identifier):
    session = database_connection.session
    hypothesis = get_by_name_or_id(session, GlycopeptideHypothesis, hypothesis_identifier)
    counts = session.query(Protein, func.count(Glycopeptide.id)).join(
        Glycopeptide).group_by(Protein.id).filter(Protein.hypothesis_id == hypothesis.id).all()
    counts = sorted(counts, key=lambda x: x[1], reverse=1)
    total = 0
    for protein, count in counts:
        click.echo("%s: %d" % (protein.name, count))
        total += count
    click.echo("Total: %d" % (total,))
 def digest_proteins(self):
     digestor = ProteinDigestor(self.protease, self.constant_modifications,
                                self.variable_modifications,
                                self.max_missed_cleavages)
     task = MultipleProcessProteinDigestor(self._original_connection,
                                           self.hypothesis_id,
                                           self.protein_ids(),
                                           digestor,
                                           n_processes=self.n_processes)
     task.run()
     n_peptides = self.query(func.count(Peptide.id)).filter(
         Peptide.hypothesis_id == self.hypothesis_id).scalar()
     self.log("%d Base Peptides Produced" % (n_peptides, ))
 def digest_proteins(self):
     digestor = ProteinDigestor(
         self.protease, self.constant_modifications, self.variable_modifications,
         self.max_missed_cleavages, semispecific=self.semispecific)
     task = MultipleProcessProteinDigestor(
         self._original_connection,
         self.hypothesis_id,
         self.protein_ids(),
         digestor, n_processes=self.n_processes)
     task.run()
     n_peptides = self.query(func.count(Peptide.id)).filter(
         Peptide.hypothesis_id == self.hypothesis_id).scalar()
     self.log("%d Base Peptides Produced" % (n_peptides,))
Exemplo n.º 9
0
def summarize_glycopeptide_hypothesis(database_connection,
                                      hypothesis_identifier):
    session = database_connection.session
    hypothesis = get_by_name_or_id(session, GlycopeptideHypothesis,
                                   hypothesis_identifier)
    counts = session.query(Protein, func.count(
        Glycopeptide.id)).join(Glycopeptide).group_by(
            Protein.id).filter(Protein.hypothesis_id == hypothesis.id).all()
    counts = sorted(counts, key=lambda x: x[1], reverse=1)
    total = 0
    for protein, count in counts:
        click.echo("%s: %d" % (protein.name, count))
        total += count
    click.echo("Total: %d" % (total, ))
Exemplo n.º 10
0
def protein_index(session, hypothesis_id):
    theoretical_counts = session.query(Protein.name, Protein.id, func.count(Glycopeptide.id)).join(
        Glycopeptide).group_by(Protein.id).filter(
        Protein.hypothesis_id == hypothesis_id).all()

    listing = []
    for protein_name, protein_id, glycopeptide_count in theoretical_counts:
        entry = {
            "protein_name": protein_name,
            "protein_id": protein_id,
            "theoretical_count": glycopeptide_count
        }
        listing.append(entry)
    protein_index = sorted(listing, key=lambda x: x["protein_name"])
    for protein_entry in protein_index:
        protein_entry['protein'] = session.query(Protein).get(protein_entry["protein_id"])
    return protein_index
Exemplo n.º 11
0
 def _count_produced_glycopeptides(self):
     count = self.query(
         func.count(Glycopeptide.id)).filter(
         Glycopeptide.hypothesis_id == self.hypothesis_id).scalar()
     self.log("Generated %d glycopeptides" % count)
 def __len__(self):
     stmt = select([func.count(self.identity_field)
                    ]).select_from(self.selectable)
     stmt = self._limit_to_hypothesis(stmt)
     return self.session.execute(stmt).scalar()
Exemplo n.º 13
0
 def _count_produced_glycopeptides(self):
     count = self.query(func.count(Glycopeptide.id)).filter(
         Glycopeptide.hypothesis_id == self.hypothesis_id).scalar()
     self.log("Generated %d glycopeptides" % count)
     self.set_parameters({"database_size": count})
     return count