def initialize_from_data(self, reverse=False, smoother="lowess", force=False): # use the data in self.transformation_data to create the trafos for s_from, darr in self.transformation_data.items(): self.transformations[s_from] = {} import time for s_to, data in darr.items(): start = time.time() if not self.getTransformedData(s_from, s_to) is None: sm = smoothing.SmoothingInterpolation() sm.initialize(data[0], self.getTransformedData(s_from, s_to)) self._addTransformation(sm, s_from, s_to) if reverse: sm_rev = smoothing.SmoothingInterpolation() sm_rev.initialize( self.getTransformedData(s_from, s_to), data[0]) self._addTransformation(sm_rev, s_to, s_from) else: sm = smoothing.getSmoothingObj(smoother) sm.initialize(data[0], data[1]) self.transformations[s_from][s_to] = sm if reverse: sm_rev = smoothing.getSmoothingObj(smoother) sm_rev.initialize(data[1], data[0]) self._addTransformation(sm_rev, s_to, s_from) print("Took %0.4fs to align %s against %s" % (time.time() - start, s_to, s_from))
def addDataToTrafo(tr_data, run_0, run_1, spl_aligner, multipeptides, realign_method, max_rt_diff, topN=5, sd_max_data_length=1000): id_0 = run_0.get_id() id_1 = run_1.get_id() if id_0 == id_1: null = smoothing.SmoothingNull() tr_data.addTrafo(id_0, id_1, null) tr_data.addTrafo(id_1, id_0, null) return # Data data_0, data_1 = spl_aligner._getRTData(run_0, run_1, multipeptides) tr_data.addData(id_0, data_0, id_1, data_1) # import pylab # pylab.scatter(data_0, data_1) # pylab.savefig('data_%s_%s.pdf' % (run_0, run_1) ) # pylab.clf() # pylab.scatter(data_0, data_1) # pylab.xlim(2300, 2600) # pylab.ylim(2300, 2600) # pylab.savefig('data_%s_%s_zoom.pdf' % (run_0, run_1) ) # pylab.clf() if len(data_0) == 0: null = smoothing.SmoothingNull() tr_data.addTrafo(id_0, id_1, null) tr_data.addTrafo(id_1, id_0, null) return # Smoothers sm_0_1 = smoothing.getSmoothingObj(realign_method, topN=topN, max_rt_diff=max_rt_diff, min_rt_diff=0.1, removeOutliers=False, tmpdir=None) sm_1_0 = smoothing.getSmoothingObj(realign_method, topN=topN, max_rt_diff=max_rt_diff, min_rt_diff=0.1, removeOutliers=False, tmpdir=None) # Initialize smoother sm_0_1.initialize(data_0, data_1) sm_1_0.initialize(data_1, data_0) # Compute error for alignment (standard deviation) stdev_0_1 = 0.0 stdev_1_0 = 0.0 if sd_max_data_length > 0: sample_idx = random.sample( xrange(len(data_0)), min(sd_max_data_length, len(data_0)) ) data_0_s = [data_0[i] for i in sample_idx] data_1_s = [data_1[i] for i in sample_idx] data0_aligned = sm_0_1.predict(data_0_s) stdev_0_1 = numpy.std(numpy.array(data_1_s) - numpy.array(data0_aligned)) data1_aligned = sm_1_0.predict(data_1_s) stdev_1_0 = numpy.std(numpy.array(data_0_s) - numpy.array(data1_aligned)) print("stdev for", id_0, id_1, stdev_0_1, " / ", stdev_1_0, "on data length", len(data_0_s)) # Add data tr_data.addTrafo(id_0, id_1, sm_0_1, stdev_0_1) tr_data.addTrafo(id_1, id_0, sm_1_0, stdev_1_0)
def initialize_from_data(self, reverse=False, smoother="lowess", force=False): # use the data in self.transformation_data to create the trafos for s_from, darr in self.transformation_data.items(): self.transformations[s_from] = {} import time for s_to, data in darr.items(): start = time.time() if not self.getTransformedData(s_from, s_to) is None: sm = smoothing.SmoothingInterpolation() sm.initialize(data[0], self.getTransformedData(s_from, s_to)) self._addTransformation(sm, s_from, s_to) if reverse: sm_rev = smoothing.SmoothingInterpolation() sm_rev.initialize(self.getTransformedData(s_from, s_to), data[0]) self._addTransformation(sm_rev, s_to, s_from) else: sm = smoothing.getSmoothingObj(smoother) sm.initialize(data[0], data[1]) self.transformations[s_from][s_to] = sm if reverse: sm_rev = smoothing.getSmoothingObj(smoother) sm_rev.initialize(data[1], data[0]) self._addTransformation(sm_rev, s_to, s_from) print("Took %0.4fs to align %s against %s" % (time.time() - start, s_to, s_from))
def test_gettingOperator_obj(self): """ Test getting the correct smoothing operator (new interface) """ op = smoothing.getSmoothingObj("diRT") self.assertTrue(isinstance(op, smoothing.SmoothingNull)) op = smoothing.getSmoothingObj("None") self.assertTrue(isinstance(op, smoothing.SmoothingNull)) op = smoothing.getSmoothingObj("linear") self.assertTrue(isinstance(op, smoothing.SmoothingLinear))
def test_gettingOperator_obj(self): """ Test getting the correct smoothing operator (new interface) """ op = smoothing.getSmoothingObj("diRT") self.assertTrue(isinstance(op, smoothing.SmoothingNull)) op = smoothing.getSmoothingObj("None") self.assertTrue(isinstance(op, smoothing.SmoothingNull)) op = smoothing.getSmoothingObj("linear") self.assertTrue(isinstance(op, smoothing.SmoothingLinear)) op = smoothing.getSmoothingObj("splineR") self.assertTrue(isinstance(op, smoothing.SmoothingR))
def _spline_align_runs(self, bestrun, run, multipeptides): """Will align run against bestrun""" sm = smoothing.getSmoothingObj(smoother=self.smoother, tmpdir=self.tmpdir_) # get those peptides we want to use for alignment => for this use the mapping # data1 = reference data (master) # data2 = data to be aligned (slave) data1, data2 = self._getRTData(bestrun, run, multipeptides) if len(data2) < 2: print "No common identifications between %s and %s. Only found %s features below a cutoff of %s" % ( run.get_id(), bestrun.get_id(), len(data1), self.alignment_fdr_threshold_) print "If you ran the feature_alignment.py script, try to skip the re-alignment step (e.g. remove the --realign_runs option)." raise Exception("Not enough datapoints (less than 2 datapoints).") # Since we want to predict how to convert from slave to master, slave # is first and master is second. sm.initialize(data2, data1) data2_aligned = sm.predict(data2) # Store transformation in collection (from run to bestrun) self.transformation_collection.addTransformationData([data2, data1], run.get_id(), bestrun.get_id()) self.transformation_collection.addTransformedData( data2_aligned, run.get_id(), bestrun.get_id()) stdev = numpy.std(numpy.array(data1) - numpy.array(data2_aligned)) median = numpy.median(numpy.array(data1) - numpy.array(data2_aligned)) print "Will align run %s against %s, using %s features" % ( run.get_id(), bestrun.get_id(), len(data1)) print " Computed stdev", stdev, "and median", median # Store error for later d = self.transformation_error.transformations.get(run.get_id(), {}) d[bestrun.get_id()] = [stdev, median] self.transformation_error.transformations[run.get_id()] = d # Now predict on _all_ data and write this back to the data i = 0 all_pg = [] for prgr in run: for pep in prgr: all_pg.extend([(pg.get_normalized_retentiontime(), pg.get_feature_id()) for pg in pep.get_all_peakgroups()]) rt_eval = [pg[0] for pg in all_pg] aligned_result = sm.predict(rt_eval) for prgr in run: for pep in prgr: # TODO hack -> direct access to the internal peakgroups object mutable = [list(pg) for pg in pep.peakgroups_] for k in range(len(mutable)): mutable[k][2] = aligned_result[i] i += 1 pep.peakgroups_ = [tuple(m) for m in mutable]
def test_gettingOperator_rpy2(self): """ Test getting the correct smoothing operator """ op = smoothing.get_smooting_operator() self.assertTrue(isinstance(op, smoothing.SmoothingR)) op = smoothing.getSmoothingObj("splineR") self.assertTrue(isinstance(op, smoothing.SmoothingR))
def _spline_align_runs(self, bestrun, run, multipeptides): """Will align run against bestrun""" sm = smoothing.getSmoothingObj(smoother = self.smoother, tmpdir = self.tmpdir_) # get those peptides we want to use for alignment => for this use the mapping # data1 = reference data (master) # data2 = data to be aligned (slave) data1,data2 = self._getRTData(bestrun, run, multipeptides) if len(data2) < 2: print("No common identifications between %s and %s. Only found %s features below a cutoff of %s" % ( run.get_id(), bestrun.get_id(), len(data1), self.alignment_fdr_threshold_) ) print("If you ran the feature_alignment.py script, try to skip the re-alignment step (e.g. remove the --realign_runs option)." ) raise Exception("Not enough datapoints (less than 2 datapoints).") # Since we want to predict how to convert from slave to master, slave # is first and master is second. sm.initialize(data2, data1) data2_aligned = sm.predict(data2) # Store transformation in collection (from run to bestrun) self.transformation_collection.addTransformationData([data2, data1], run.get_id(), bestrun.get_id() ) self.transformation_collection.addTransformedData(data2_aligned, run.get_id(), bestrun.get_id() ) stdev = numpy.std(numpy.array(data1) - numpy.array(data2_aligned)) median = numpy.median(numpy.array(data1) - numpy.array(data2_aligned)) print("Will align run %s against %s, using %s features" % (run.get_id(), bestrun.get_id(), len(data1)) ) print(" Computed stdev", stdev, "and median", median ) # Store error for later d = self.transformation_error.transformations.get(run.get_id(), {}) d[bestrun.get_id()] = [stdev, median] self.transformation_error.transformations[ run.get_id() ] = d # Now predict on _all_ data and write this back to the data i = 0 all_pg = [] for prgr in run: for pep in prgr: all_pg.extend( [ (pg.get_normalized_retentiontime(), pg.get_feature_id()) for pg in pep.get_all_peakgroups()] ) rt_eval = [ pg[0] for pg in all_pg] aligned_result = sm.predict(rt_eval) for prgr in run: for pep in prgr: # TODO hack -> direct access to the internal peakgroups object mutable = [list(pg) for pg in pep.peakgroups_] for k in range(len(mutable)): mutable[k][2] = aligned_result[i] i += 1 pep.peakgroups_ = [ tuple(m) for m in mutable]
def addDataToTrafo(tr_data, run_0, run_1, spl_aligner, multipeptides, realign_method, max_rt_diff, topN=5, sd_max_data_length=1000): id_0 = run_0.get_id() id_1 = run_1.get_id() if id_0 == id_1: null = smoothing.SmoothingNull() tr_data.addTrafo(id_0, id_1, null) tr_data.addTrafo(id_1, id_0, null) return # Data data_0, data_1 = spl_aligner._getRTData(run_0, run_1, multipeptides) tr_data.addData(id_0, data_0, id_1, data_1) # import pylab # pylab.scatter(data_0, data_1) # pylab.savefig('data_%s_%s.pdf' % (run_0, run_1) ) # pylab.clf() # pylab.scatter(data_0, data_1) # pylab.xlim(2300, 2600) # pylab.ylim(2300, 2600) # pylab.savefig('data_%s_%s_zoom.pdf' % (run_0, run_1) ) # pylab.clf() if len(data_0) == 0: null = smoothing.SmoothingNull() tr_data.addTrafo(id_0, id_1, null) tr_data.addTrafo(id_1, id_0, null) return # Smoothers sm_0_1 = smoothing.getSmoothingObj(realign_method, topN=topN, max_rt_diff=max_rt_diff, min_rt_diff=0.1, removeOutliers=False, tmpdir=None) sm_1_0 = smoothing.getSmoothingObj(realign_method, topN=topN, max_rt_diff=max_rt_diff, min_rt_diff=0.1, removeOutliers=False, tmpdir=None) # Initialize smoother sm_0_1.initialize(data_0, data_1) sm_1_0.initialize(data_1, data_0) # Compute error for alignment (standard deviation) stdev_0_1 = 0.0 stdev_1_0 = 0.0 if sd_max_data_length > 0: sample_idx = random.sample(xrange(len(data_0)), min(sd_max_data_length, len(data_0))) data_0_s = [data_0[i] for i in sample_idx] data_1_s = [data_1[i] for i in sample_idx] data0_aligned = sm_0_1.predict(data_0_s) stdev_0_1 = numpy.std( numpy.array(data_1_s) - numpy.array(data0_aligned)) data1_aligned = sm_1_0.predict(data_1_s) stdev_1_0 = numpy.std( numpy.array(data_0_s) - numpy.array(data1_aligned)) print "stdev for", id_0, id_1, stdev_0_1, " / ", stdev_1_0, "on data length", len( data_0_s) # Add data tr_data.addTrafo(id_0, id_1, sm_0_1, stdev_0_1) tr_data.addTrafo(id_1, id_0, sm_1_0, stdev_1_0)
def addDataToTrafo(tr_data, run_0, run_1, spl_aligner, multipeptides, realign_method, max_rt_diff, topN=5, sd_max_data_length=5000, force=False): id_0 = run_0.get_id() id_1 = run_1.get_id() if id_0 == id_1: null = smoothing.SmoothingNull() tr_data.addTrafo(id_0, id_1, null) tr_data.addTrafo(id_1, id_0, null) return # Data data_0, data_1 = spl_aligner._getRTData(run_0, run_1, multipeptides) tr_data.addData(id_0, data_0, id_1, data_1) # import pylab # pylab.scatter(data_0, data_1) # pylab.savefig('data_%s_%s.pdf' % (run_0, run_1) ) # pylab.clf() # pylab.scatter(data_0, data_1) # pylab.xlim(2300, 2600) # pylab.ylim(2300, 2600) # pylab.savefig('data_%s_%s_zoom.pdf' % (run_0, run_1) ) # pylab.clf() if len(data_0) == 0: print("Warning, zero data!") if force: null = smoothing.SmoothingNull() tr_data.addTrafo(id_0, id_1, null) tr_data.addTrafo(id_1, id_0, null) return else: raise Exception("No data available for alignment %s vs %s" % (id_0, id_1)) # Smoothers sm_0_1 = smoothing.getSmoothingObj(realign_method, topN=topN, max_rt_diff=max_rt_diff, min_rt_diff=0.1, removeOutliers=False, tmpdir=None) sm_1_0 = smoothing.getSmoothingObj(realign_method, topN=topN, max_rt_diff=max_rt_diff, min_rt_diff=0.1, removeOutliers=False, tmpdir=None) # Initialize smoother sm_0_1.initialize(data_0, data_1) sm_1_0.initialize(data_1, data_0) # Compute error for alignment (standard deviation) stdev_0_1 = 0.0 stdev_1_0 = 0.0 if sd_max_data_length > 0: sample_idx = random.sample(xrange(len(data_0)), min(sd_max_data_length, len(data_0))) data_0_s = [data_0[i] for i in sample_idx] data_1_s = [data_1[i] for i in sample_idx] data0_aligned = sm_0_1.predict(data_0_s) stdev_0_1 = numpy.std( numpy.array(data_1_s) - numpy.array(data0_aligned)) data1_aligned = sm_1_0.predict(data_1_s) stdev_1_0 = numpy.std( numpy.array(data_0_s) - numpy.array(data1_aligned)) print("stdev for", id_0, id_1, stdev_0_1, " / ", stdev_1_0, "on data length", len(data_0_s)) # Add data and trafo description. # The CyLightTransformationData actually requires to get a specific type of # transformation, the CyLinearInterpolateWrapper which may not be directly # passed to this function. We will try to recover the underlying linear # wrapper and then stick it into the tr_data object. If this fails, we just # revert to the regular behavior. try: sm_0_1_lwp = sm_0_1.internal_interpolation.getLWP() sm_1_0_lwp = sm_1_0.internal_interpolation.getLWP() tr_data.addTrafo(id_0, id_1, sm_0_1_lwp, stdev_0_1) tr_data.addTrafo(id_1, id_0, sm_1_0_lwp, stdev_1_0) except Exception: tr_data.addTrafo(id_0, id_1, sm_0_1, stdev_0_1) tr_data.addTrafo(id_1, id_0, sm_1_0, stdev_1_0)
def addDataToTrafo(tr_data, run_0, run_1, spl_aligner, multipeptides, realign_method, max_rt_diff, topN=5, sd_max_data_length=5000, force=False): id_0 = run_0.get_id() id_1 = run_1.get_id() if id_0 == id_1: null = smoothing.SmoothingNull() tr_data.addTrafo(id_0, id_1, null) tr_data.addTrafo(id_1, id_0, null) return # Data data_0, data_1 = spl_aligner._getRTData(run_0, run_1, multipeptides) tr_data.addData(id_0, data_0, id_1, data_1) # import pylab # pylab.scatter(data_0, data_1) # pylab.savefig('data_%s_%s.pdf' % (run_0, run_1) ) # pylab.clf() # pylab.scatter(data_0, data_1) # pylab.xlim(2300, 2600) # pylab.ylim(2300, 2600) # pylab.savefig('data_%s_%s_zoom.pdf' % (run_0, run_1) ) # pylab.clf() if len(data_0) == 0: print("Warning, zero data! Consider increasing the anchor point cutoff (--alignment_score) to include more peptides.") if force: null = smoothing.SmoothingNull() tr_data.addTrafo(id_0, id_1, null) tr_data.addTrafo(id_1, id_0, null) return else: raise Exception("No data available for alignment %s vs %s" % (id_0, id_1) ) # Smoothers sm_0_1 = smoothing.getSmoothingObj(realign_method, topN=topN, max_rt_diff=max_rt_diff, min_rt_diff=0.1, removeOutliers=False, tmpdir=None) sm_1_0 = smoothing.getSmoothingObj(realign_method, topN=topN, max_rt_diff=max_rt_diff, min_rt_diff=0.1, removeOutliers=False, tmpdir=None) # Initialize smoother sm_0_1.initialize(data_0, data_1) sm_1_0.initialize(data_1, data_0) # Compute error for alignment (standard deviation) stdev_0_1 = 0.0 stdev_1_0 = 0.0 if sd_max_data_length > 0: sample_idx = random.sample( xrange(len(data_0)), min(sd_max_data_length, len(data_0)) ) data_0_s = [data_0[i] for i in sample_idx] data_1_s = [data_1[i] for i in sample_idx] data0_aligned = sm_0_1.predict(data_0_s) stdev_0_1 = numpy.std(numpy.array(data_1_s) - numpy.array(data0_aligned)) data1_aligned = sm_1_0.predict(data_1_s) stdev_1_0 = numpy.std(numpy.array(data_0_s) - numpy.array(data1_aligned)) print("stdev for", id_0, id_1, stdev_0_1, " / ", stdev_1_0, "on data length", len(data_0_s)) # Add data and trafo description. # The CyLightTransformationData actually requires to get a specific type of # transformation, the CyLinearInterpolateWrapper which may not be directly # passed to this function. We will try to recover the underlying linear # wrapper and then stick it into the tr_data object. If this fails, we just # revert to the regular behavior. try: sm_0_1_lwp = sm_0_1.internal_interpolation.getLWP() sm_1_0_lwp = sm_1_0.internal_interpolation.getLWP() tr_data.addTrafo(id_0, id_1, sm_0_1_lwp, stdev_0_1) tr_data.addTrafo(id_1, id_0, sm_1_0_lwp, stdev_1_0) except Exception: tr_data.addTrafo(id_0, id_1, sm_0_1, stdev_0_1) tr_data.addTrafo(id_1, id_0, sm_1_0, stdev_1_0)