Exemplo n.º 1
0
    def test_shortestPath_2(self):

        rid = "0_1"
        spl_aligner = SplineAligner(self.initial_alignment_cutoff)
        tree = MinimumSpanningTree(getDistanceMatrix(self.new_exp, self.multipeptides, spl_aligner))
        tree_mapped = [(self.new_exp.runs[a].get_id(), self.new_exp.runs[b].get_id()) for a,b in tree]

        # Select peakgroups, compute left/right border
        selected_pg = [pg for p in self.current_mpep1.getAllPeptides() for pg in p.get_all_peakgroups() if pg.get_cluster_id() == 1]
        border_l, border_r = integrationBorderShortestPath(selected_pg, 
            rid, self.tr_data, tree_mapped)

        # Shortest path means that we transformed from 0_2 to 0_1
        self.assertAlmostEqual(border_l, self.tr_data.getTrafo("0_2", "0_1").predict( [ 240.0 ] ))
        self.assertAlmostEqual(border_r, self.tr_data.getTrafo("0_2", "0_1").predict( [ 260.0 ] ))

        self.assertAlmostEqual(border_l, 168.03088803088787)
        self.assertAlmostEqual(border_r, 183.32046332046318)
Exemplo n.º 2
0
    def test_shortestPath_1(self):

        rid = "0_0"
        spl_aligner = SplineAligner(self.initial_alignment_cutoff)
        tree = MinimumSpanningTree(getDistanceMatrix(self.new_exp, self.multipeptides, spl_aligner))
        tree_mapped = [(self.new_exp.runs[a].get_id(), self.new_exp.runs[b].get_id()) for a,b in tree]

        # Select peakgroups, compute left/right border
        selected_pg = [pg for p in self.current_mpep1.getAllPeptides() for pg in p.get_all_peakgroups() if pg.get_cluster_id() == 1]
        border_l, border_r = integrationBorderShortestPath(selected_pg, 
            rid, self.tr_data, tree_mapped)

        # Direct transformation from 0_2 to 0_0
        self.assertAlmostEqual(border_l, self.tr_data.getTrafo("0_2", "0_0").predict([ 240.0 ])[0])
        self.assertAlmostEqual(border_r, self.tr_data.getTrafo("0_2", "0_0").predict([ 260.0 ])[0])

        self.assertAlmostEqual(border_l, 77.992277992277934)
        self.assertAlmostEqual(border_r, 84.1698841699)
def analyze_multipeptide_cluster(current_mpep, cnt, new_exp, swath_chromatograms, 
                          transformation_collection_, border_option, selected_pg, cluster_id,
                          onlyExtractFromRun=None, tree=None, mat=None, is_test=False):

        for rid in [r.get_id() for r in new_exp.runs]:
            if VERBOSE: print "Shall work on run ", rid
            cnt.peakgroups += 1

            # Check whether we have a peakgroup for this run id, if yes we mark
            # this peakgroup as selected for output, otherwise we try to impute.
            if not any([pg.peptide.run.get_id() == rid for pg in selected_pg]):

                # Skip if we should not extract from this run
                if not onlyExtractFromRun is None:
                    if onlyExtractFromRun != rid:
                        continue

                cnt.imputations += 1

                # Select current run, compute right/left integration border and then integrate
                current_run = [r for r in new_exp.runs if r.get_id() == rid][0]
                rmap = dict([(r.get_id(),i) for i,r in enumerate(new_exp.runs) ])

                if VERBOSE:
                    print "Will try to fill NA in run", current_run.get_id(), "for peptide", selected_pg[0].peptide.get_id()
                    print tree, mat

                inferred_from_isotope = False
                if current_mpep.hasPrecursorGroup(current_run.get_id()):
                    # If another precursor was already picked for this run,
                    # check if any peakgroups of the same cluster exist and if
                    # so, try to use the one with the minimal FDR score to
                    # infer the boundaries.
                    prgr = current_mpep.getPrecursorGroup(current_run.get_id())
                    pgs = [(pg_.get_fdr_score(), pg_) for prec_ in prgr for pg_ in prec_.peakgroups if pg_.get_cluster_id() == cluster_id] 
                    if len(pgs) > 0:
                        # TODO if the boundaries were inferred (e.g. fdr score > 1.0), should we not use them? 
                        best_pg = min(pgs)[1]
                        border_l = float(best_pg.get_value("leftWidth"))
                        border_r = float(best_pg.get_value("rightWidth"))
                        inferred_from_isotope = True

                        if VERBOSE:
                            print "Will try to infer from isotopically modified version of the same run!"
                            print "Precursor is", prgr
                            for prec in prgr:
                                print " --", prec
                                for pg in prec.peakgroups:
                                    print "  * ", pg
                            print "Min fdr pg", best_pg, " border %s / %s" % (border_l, border_r)

                if inferred_from_isotope:
                    # All good
                    pass

                elif tree is not None:
                    ## Use the closest path approach
                    border_l, border_r = integrationBorderShortestPath(selected_pg, 
                        rid, transformation_collection_, tree)
                elif mat is not None:
                    ## Use the closest overall run approach
                    border_l, border_r = integrationBorderShortestDistance(selected_pg, 
                        rid, transformation_collection_, mat, rmap)
                else:
                    ## Use the refernce-based approach
                    border_l, border_r = integrationBorderReference(new_exp, selected_pg, rid, transformation_collection_, border_option)
                newpg = integrate_chromatogram(selected_pg[0], current_run, swath_chromatograms,
                                             border_l, border_r, cnt, is_test)
                if newpg != "NA": 
                    if VERBOSE: 
                        print "Managed to fill NA in run", current_run.get_id(), \
                          "with value", newpg.get_value("Intensity"), "/ borders", border_l, border_r #, "for cluster", newpg.get_value("align_clusterid")
                    cnt.imputation_succ += 1
                    transition_group_id = newpg.get_value("transition_group_id")
                    try:
                        peptide_label = newpg.get_value("peptide_group_label")
                    except KeyError:
                        peptide_label = transition_group_id

                    # Create new precursor
                    precursor = GeneralPrecursor(transition_group_id, current_run)
                    newpg.setClusterID(cluster_id)
                    newpg.peptide = precursor
                    precursor.add_peakgroup(newpg)
                    precursor.sequence = selected_pg[0].peptide.sequence
                    precursor.protein_name = selected_pg[0].peptide.protein_name

                    if current_mpep.hasPrecursorGroup(current_run.get_id()):
                        prec_group = current_mpep.getPrecursorGroup(current_run.get_id())
                        if prec_group.getPrecursor(transition_group_id) is None:
                            # No precursors exists yet for this transition_group_id - this
                            # means that we have a new run for which a precursor group already
                            # exists (e.g. we have found a light version already) and are now
                            # dealing with the heavy ...
                            prec_group.addPrecursor(precursor)
                        else:
                            # Likely, another peakgroup from the same run and peptide was
                            # picked already but not from the same cluster
                            prec_group.getPrecursor(transition_group_id).add_peakgroup(newpg)
                    else:
                        # Create new precursor group and insert
                        precursor_group = PrecursorGroup(peptide_label, current_run)
                        precursor_group.addPrecursor(precursor)
                        current_mpep.insert(rid, precursor_group)
def analyze_multipeptide_cluster(current_mpep, cnt, new_exp, swath_chromatograms, 
                          transformation_collection_, border_option, selected_pg, cluster_id,
                          onlyExtractFromRun=None, tree=None, mat=None, is_test=False):

        for rid in [r.get_id() for r in new_exp.runs]:
            if VERBOSE: print "Shall work on run ", rid
            cnt.peakgroups += 1

            # Check whether we have a peakgroup for this run id, if yes we mark
            # this peakgroup as selected for output, otherwise we try to impute.
            if not any([pg.peptide.run.get_id() == rid for pg in selected_pg]):

                # Skip if we should not extract from this run
                if not onlyExtractFromRun is None:
                    if onlyExtractFromRun != rid:
                        continue

                cnt.imputations += 1

                # Select current run, compute right/left integration border and then integrate
                current_run = [r for r in new_exp.runs if r.get_id() == rid][0]
                rmap = dict([(r.get_id(),i) for i,r in enumerate(new_exp.runs) ])

                if VERBOSE:
                    print "Will try to fill NA in run", current_run.get_id(), "for peptide", selected_pg[0].peptide.get_id()
                    print tree, mat

                inferred_from_isotope = False
                if current_mpep.hasPrecursorGroup(current_run.get_id()):
                    # If another precursor was already picked for this run,
                    # check if any peakgroups of the same cluster exist and if
                    # so, try to use the one with the minimal FDR score to
                    # infer the boundaries.
                    prgr = current_mpep.getPrecursorGroup(current_run.get_id())
                    pgs = [(pg_.get_fdr_score(), pg_) for prec_ in prgr for pg_ in prec_.peakgroups if pg_.get_cluster_id() == cluster_id] 
                    if len(pgs) > 0:
                        # TODO if the boundaries were inferred (e.g. fdr score > 1.0), should we not use them? 
                        best_pg = min(pgs)[1]
                        border_l = float(best_pg.get_value("leftWidth"))
                        border_r = float(best_pg.get_value("rightWidth"))
                        inferred_from_isotope = True

                        if VERBOSE:
                            print "Will try to infer from isotopically modified version of the same run!"
                            print "Precursor is", prgr
                            for prec in prgr:
                                print " --", prec
                                for pg in prec.peakgroups:
                                    print "  * ", pg
                            print "Min fdr pg", best_pg, " border %s / %s" % (border_l, border_r)

                if inferred_from_isotope:
                    # All good
                    pass

                elif tree is not None:
                    ## Use the closest path approach
                    border_l, border_r = integrationBorderShortestPath(selected_pg, 
                        rid, transformation_collection_, tree)
                elif mat is not None:
                    ## Use the closest overall run approach
                    border_l, border_r = integrationBorderShortestDistance(selected_pg, 
                        rid, transformation_collection_, mat, rmap)
                else:
                    ## Use the refernce-based approach
                    border_l, border_r = integrationBorderReference(new_exp, selected_pg, rid, transformation_collection_, border_option)
                newpg = integrate_chromatogram(selected_pg[0], current_run, swath_chromatograms,
                                             border_l, border_r, cnt, is_test)
                if newpg != "NA": 
                    if VERBOSE: 
                        print "Managed to fill NA in run", current_run.get_id(), \
                          "with value", newpg.get_value("Intensity"), "/ borders", border_l, border_r #, "for cluster", newpg.get_value("align_clusterid")
                    cnt.imputation_succ += 1
                    transition_group_id = newpg.get_value("transition_group_id")
                    try:
                        peptide_label = newpg.get_value("peptide_group_label")
                    except KeyError:
                        peptide_label = transition_group_id

                    # Create new precursor
                    precursor = GeneralPrecursor(transition_group_id, current_run)
                    newpg.setClusterID(cluster_id)
                    newpg.peptide = precursor
                    precursor.add_peakgroup(newpg)
                    precursor.sequence = selected_pg[0].peptide.sequence
                    precursor.protein_name = selected_pg[0].peptide.protein_name

                    if current_mpep.hasPrecursorGroup(current_run.get_id()):
                        prec_group = current_mpep.getPrecursorGroup(current_run.get_id())
                        if prec_group.getPrecursor(transition_group_id) is None:
                            # No precursors exists yet for this transition_group_id - this
                            # means that we have a new run for which a precursor group already
                            # exists (e.g. we have found a light version already) and are now
                            # dealing with the heavy ...
                            prec_group.addPrecursor(precursor)
                        else:
                            # Likely, another peakgroup from the same run and peptide was
                            # picked already but not from the same cluster
                            prec_group.getPrecursor(transition_group_id).add_peakgroup(newpg)
                    else:
                        # Create new precursor group and insert
                        precursor_group = PrecursorGroup(peptide_label, current_run)
                        precursor_group.addPrecursor(precursor)
                        current_mpep.insert(rid, precursor_group)