示例#1
0
文件: mapper.py 项目: okulev/seal
    def __init__(self, ctx):
        super(type(self), self).__init__(ctx)
        self.__get_configuration(ctx)
        logging.basicConfig(level=self.log_level)
        self.event_monitor = HadoopEventMonitor(self.COUNTER_CLASS,
                                                logging.getLogger("mapper"),
                                                ctx)

        self.aligner = BwaAligner()
        self.aligner.event_monitor = self.event_monitor
        self.aligner.qformat = self.format
        self.aligner.max_isize = self.max_isize
        self.aligner.nthreads = self.nthreads
        self.aligner.trim_qual = self.trim_qual
        self.aligner.mmap_enabled = True

        ######## assemble hit processor chain
        chain = FilterLink(self.event_monitor)
        chain.remove_unmapped = self.remove_unmapped
        chain.min_hit_quality = self.min_hit_quality
        if self.__map_only:
            chain.set_next(EmitSamLink(ctx, self.event_monitor))
        else:
            chain.set_next(MarkDuplicatesEmitter(ctx, self.event_monitor))
        self.aligner.hit_visitor = chain

        ######## set the path to the reference index
        self.ref_archive = utils.get_ref_archive(ctx.getJobConf())
        self.aligner.reference = self.get_reference_root(self.ref_archive)

        # part of the code is a workaround for accumulating records, see #331
        isplit = InputSplit(ctx.getInputSplit())
        self.split_end = isplit.offset + isplit.length
示例#2
0
 def setUp(self):
     self.monitor = StandardMonitor(SavingLogger())
     self.filter = FilterLink(self.monitor)
     self.receiver = self.filter.set_next(type(self).Receiver())
     # create two mappings, m1, m2.  We put them in self.pair
     # m1 has:
     #   name = first
     # 	tid = tid1
     # m2 has:
     #   name = second
     #   tid = tid2
     self.pair = [ SimpleMapping(), SimpleMapping() ]
     self.m1, self.m2 = self.pair
     self.m1.set_name("first") ; self.m2.set_name("second")
     self.m1.tid = "tid1" ; self.m2.tid = "tid2"
     self.m1.qual = 50 ; self.m2.qual = 30
示例#3
0
文件: mapper.py 项目: pinno/seal
    def __init__(self, ctx):
        super(type(self), self).__init__(ctx)
        self.__get_configuration(ctx)
        logging.basicConfig(level=self.log_level)
        self.event_monitor = HadoopEventMonitor(self.COUNTER_CLASS, logging.getLogger("mapper"), ctx)

        self.aligner = BwaAligner()
        self.aligner.event_monitor = self.event_monitor
        self.aligner.qformat = self.format
        self.aligner.max_isize = self.max_isize
        self.aligner.nthreads = self.nthreads
        self.aligner.trim_qual = self.trim_qual
        self.aligner.mmap_enabled = True

        ######## assemble hit processor chain
        chain = FilterLink(self.event_monitor)
        chain.remove_unmapped = self.remove_unmapped
        chain.min_hit_quality = self.min_hit_quality
        if self.__map_only:
            chain.set_next( EmitSamLink(ctx, self.event_monitor) )
        else:
            chain.set_next( MarkDuplicatesEmitter(ctx, self.event_monitor) )
        self.aligner.hit_visitor = chain

        ######## set the path to the reference index
        self.ref_archive = utils.get_ref_archive(ctx.getJobConf())
        self.aligner.reference = self.get_reference_root(self.ref_archive)

        # part of the code is a workaround for accumulating records, see #331
        isplit = InputSplit(ctx.getInputSplit())
        self.split_end = isplit.offset + isplit.length
	def setUp(self):
		self.monitor = StandardMonitor(SavingLogger())
		self.filter = FilterLink(self.monitor)
		self.receiver = self.filter.set_next(type(self).Receiver())
		# create two mappings, m1, m2.  We put them in self.pair
		# m1 has:
		#   name = first
		# 	tid = tid1
		# m2 has:
		#   name = second
		#   tid = tid2
		self.pair = [ SimpleMapping(), SimpleMapping() ]
		self.m1, self.m2 = self.pair
		self.m1.set_name("first") ; self.m2.set_name("second")
		self.m1.tid = "tid1" ; self.m2.tid = "tid2"
		self.m1.qual = 50 ; self.m2.qual = 30
示例#5
0
 def test_without_next_link(self):
     h = FilterLink(self.monitor)
     h.process(self.pair)  # shouldn't raise
示例#6
0
 def test_constructor_link(self):
     h = FilterLink(self.monitor)
     self.assertTrue(h.next_link is None)
     other = HitProcessorChainLink()
     h = FilterLink(self.monitor, other)
     self.assertEqual(other, h.next_link)
示例#7
0
class TestFilterLink(unittest.TestCase):

    # mini object to let us peek at what the filter forwards to the next link
    class Receiver(HitProcessorChainLink):
        def __init__(self, *args):
            super(type(self), self).__init__(*args)
            self.received = None

        def process(self, pair):
            self.received = pair

    def setUp(self):
        self.monitor = StandardMonitor(SavingLogger())
        self.filter = FilterLink(self.monitor)
        self.receiver = self.filter.set_next(type(self).Receiver())
        # create two mappings, m1, m2.  We put them in self.pair
        # m1 has:
        #   name = first
        # 	tid = tid1
        # m2 has:
        #   name = second
        #   tid = tid2
        self.pair = [SimpleMapping(), SimpleMapping()]
        self.m1, self.m2 = self.pair
        self.m1.set_name("first")
        self.m2.set_name("second")
        self.m1.tid = "tid1"
        self.m2.tid = "tid2"
        self.m1.qual = 50
        self.m2.qual = 30

    def test_constructor_link(self):
        h = FilterLink(self.monitor)
        self.assertTrue(h.next_link is None)
        other = HitProcessorChainLink()
        h = FilterLink(self.monitor, other)
        self.assertEqual(other, h.next_link)

    def test_filter_none(self):
        self.filter.process(self.pair)
        self.assertFalse(self.receiver.received is None)
        self.assertEqual(self.m1.get_name(),
                         self.receiver.received[0].get_name())
        self.assertEqual(self.m2.get_name(),
                         self.receiver.received[1].get_name())
        # ensure there are no counters (i.e. nothing was filtered)
        self.assertFalse([c for c in self.monitor.each_counter()])

    def test_filter_one(self):
        self.filter.min_hit_quality = self.m2.qual + 1
        self.filter.process(self.pair)
        self.assertFalse(self.receiver.received is None)
        self.assertTrue(self.receiver.received[1] is None)
        self.assertEqual(self.m1.get_name(),
                         self.receiver.received[0].get_name())
        counter_list = [c for c in self.monitor.each_counter()]
        self.assertTrue(len(counter_list) == 1)
        name, value = counter_list[0]
        self.assertEqual("reads filtered: low quality", name)
        self.assertEqual(1, value)

    def test_filter_two(self):
        self.filter.min_hit_quality = self.m1.qual + 1
        self.filter.process(self.pair)
        self.assertTrue(self.receiver.received is None)
        counter_list = [c for c in self.monitor.each_counter()]
        self.assertTrue(len(counter_list) == 1)
        name, value = counter_list[0]
        self.assertEqual("reads filtered: low quality", name)
        self.assertEqual(2, value)

    def test_without_next_link(self):
        h = FilterLink(self.monitor)
        h.process(self.pair)  # shouldn't raise

    def test_filter_unmapped_1(self):
        self.m1.set_mapped(False)
        self.m1.qual = 0
        self.filter.process(self.pair)
        self.assertTrue(self.receiver.received[0] is None)
        self.assertFalse(self.receiver.received[1] is None)
        counter_list = [c for c in self.monitor.each_counter()]
        self.assertTrue(len(counter_list) == 1)
        name, value = counter_list[0]
        self.assertEqual("reads filtered: unmapped", name)
        self.assertEqual(1, value)
	def test_without_next_link(self):
		h = FilterLink(self.monitor)
		h.process(self.pair) # shouldn't raise
class TestFilterLink(unittest.TestCase):

	# mini object to let us peek at what the filter forwards to the next link
	class Receiver(HitProcessorChainLink):
		def __init__(self, *args):
			super(type(self), self).__init__(*args)
			self.received = None

		def process(self, pair):
			self.received = pair

	def setUp(self):
		self.monitor = StandardMonitor(SavingLogger())
		self.filter = FilterLink(self.monitor)
		self.receiver = self.filter.set_next(type(self).Receiver())
		# create two mappings, m1, m2.  We put them in self.pair
		# m1 has:
		#   name = first
		# 	tid = tid1
		# m2 has:
		#   name = second
		#   tid = tid2
		self.pair = [ SimpleMapping(), SimpleMapping() ]
		self.m1, self.m2 = self.pair
		self.m1.set_name("first") ; self.m2.set_name("second")
		self.m1.tid = "tid1" ; self.m2.tid = "tid2"
		self.m1.qual = 50 ; self.m2.qual = 30

	def test_constructor_link(self):
		h = FilterLink(self.monitor)
		self.assertTrue(h.next_link is None)
		other = HitProcessorChainLink()
		h = FilterLink(self.monitor, other)
		self.assertEqual(other, h.next_link)

	def test_filter_none(self):
		self.filter.process(self.pair)
		self.assertFalse(self.receiver.received is None)
		self.assertEqual(self.m1.get_name(), self.receiver.received[0].get_name())
		self.assertEqual(self.m2.get_name(), self.receiver.received[1].get_name())
		# ensure there are no counters (i.e. nothing was filtered)
		self.assertFalse( [ c for c in self.monitor.each_counter() ] )

	def test_filter_one(self):
		self.filter.min_hit_quality = self.m2.qual + 1
		self.filter.process(self.pair)
		self.assertFalse(self.receiver.received is None)
		self.assertTrue(self.receiver.received[1] is None)
		self.assertEqual(self.m1.get_name(), self.receiver.received[0].get_name())
		counter_list = [ c for c in self.monitor.each_counter() ]
		self.assertTrue(len(counter_list) == 1)
		name, value = counter_list[0]
		self.assertEqual("reads filtered: low quality", name)
		self.assertEqual(1, value)


	def test_filter_two(self):
		self.filter.min_hit_quality = self.m1.qual + 1
		self.filter.process(self.pair)
		self.assertTrue(self.receiver.received is None)
		counter_list = [ c for c in self.monitor.each_counter() ]
		self.assertTrue(len(counter_list) == 1)
		name, value = counter_list[0]
		self.assertEqual("reads filtered: low quality", name)
		self.assertEqual(2, value)

	def test_without_next_link(self):
		h = FilterLink(self.monitor)
		h.process(self.pair) # shouldn't raise

	def test_filter_unmapped_1(self):
		self.m1.set_mapped(False)
		self.m1.qual = 0
		self.filter.process(self.pair)
		self.assertTrue(self.receiver.received[0] is None)
		self.assertFalse(self.receiver.received[1] is None)
		counter_list = [ c for c in self.monitor.each_counter() ]
		self.assertTrue(len(counter_list) == 1)
		name, value = counter_list[0]
		self.assertEqual("reads filtered: unmapped", name)
		self.assertEqual(1, value)