def __init__(self, ctx): super(type(self), self).__init__(ctx) self.__get_configuration(ctx) logging.basicConfig(level=self.log_level) self.event_monitor = HadoopEventMonitor(self.COUNTER_CLASS, logging.getLogger("mapper"), ctx) self.aligner = BwaAligner() self.aligner.event_monitor = self.event_monitor self.aligner.qformat = self.format self.aligner.max_isize = self.max_isize self.aligner.nthreads = self.nthreads self.aligner.trim_qual = self.trim_qual self.aligner.mmap_enabled = True ######## assemble hit processor chain chain = FilterLink(self.event_monitor) chain.remove_unmapped = self.remove_unmapped chain.min_hit_quality = self.min_hit_quality if self.__map_only: chain.set_next(EmitSamLink(ctx, self.event_monitor)) else: chain.set_next(MarkDuplicatesEmitter(ctx, self.event_monitor)) self.aligner.hit_visitor = chain ######## set the path to the reference index self.ref_archive = utils.get_ref_archive(ctx.getJobConf()) self.aligner.reference = self.get_reference_root(self.ref_archive) # part of the code is a workaround for accumulating records, see #331 isplit = InputSplit(ctx.getInputSplit()) self.split_end = isplit.offset + isplit.length
def setUp(self): self.monitor = StandardMonitor(SavingLogger()) self.filter = FilterLink(self.monitor) self.receiver = self.filter.set_next(type(self).Receiver()) # create two mappings, m1, m2. We put them in self.pair # m1 has: # name = first # tid = tid1 # m2 has: # name = second # tid = tid2 self.pair = [ SimpleMapping(), SimpleMapping() ] self.m1, self.m2 = self.pair self.m1.set_name("first") ; self.m2.set_name("second") self.m1.tid = "tid1" ; self.m2.tid = "tid2" self.m1.qual = 50 ; self.m2.qual = 30
def __init__(self, ctx): super(type(self), self).__init__(ctx) self.__get_configuration(ctx) logging.basicConfig(level=self.log_level) self.event_monitor = HadoopEventMonitor(self.COUNTER_CLASS, logging.getLogger("mapper"), ctx) self.aligner = BwaAligner() self.aligner.event_monitor = self.event_monitor self.aligner.qformat = self.format self.aligner.max_isize = self.max_isize self.aligner.nthreads = self.nthreads self.aligner.trim_qual = self.trim_qual self.aligner.mmap_enabled = True ######## assemble hit processor chain chain = FilterLink(self.event_monitor) chain.remove_unmapped = self.remove_unmapped chain.min_hit_quality = self.min_hit_quality if self.__map_only: chain.set_next( EmitSamLink(ctx, self.event_monitor) ) else: chain.set_next( MarkDuplicatesEmitter(ctx, self.event_monitor) ) self.aligner.hit_visitor = chain ######## set the path to the reference index self.ref_archive = utils.get_ref_archive(ctx.getJobConf()) self.aligner.reference = self.get_reference_root(self.ref_archive) # part of the code is a workaround for accumulating records, see #331 isplit = InputSplit(ctx.getInputSplit()) self.split_end = isplit.offset + isplit.length
def test_without_next_link(self): h = FilterLink(self.monitor) h.process(self.pair) # shouldn't raise
def test_constructor_link(self): h = FilterLink(self.monitor) self.assertTrue(h.next_link is None) other = HitProcessorChainLink() h = FilterLink(self.monitor, other) self.assertEqual(other, h.next_link)
class TestFilterLink(unittest.TestCase): # mini object to let us peek at what the filter forwards to the next link class Receiver(HitProcessorChainLink): def __init__(self, *args): super(type(self), self).__init__(*args) self.received = None def process(self, pair): self.received = pair def setUp(self): self.monitor = StandardMonitor(SavingLogger()) self.filter = FilterLink(self.monitor) self.receiver = self.filter.set_next(type(self).Receiver()) # create two mappings, m1, m2. We put them in self.pair # m1 has: # name = first # tid = tid1 # m2 has: # name = second # tid = tid2 self.pair = [SimpleMapping(), SimpleMapping()] self.m1, self.m2 = self.pair self.m1.set_name("first") self.m2.set_name("second") self.m1.tid = "tid1" self.m2.tid = "tid2" self.m1.qual = 50 self.m2.qual = 30 def test_constructor_link(self): h = FilterLink(self.monitor) self.assertTrue(h.next_link is None) other = HitProcessorChainLink() h = FilterLink(self.monitor, other) self.assertEqual(other, h.next_link) def test_filter_none(self): self.filter.process(self.pair) self.assertFalse(self.receiver.received is None) self.assertEqual(self.m1.get_name(), self.receiver.received[0].get_name()) self.assertEqual(self.m2.get_name(), self.receiver.received[1].get_name()) # ensure there are no counters (i.e. nothing was filtered) self.assertFalse([c for c in self.monitor.each_counter()]) def test_filter_one(self): self.filter.min_hit_quality = self.m2.qual + 1 self.filter.process(self.pair) self.assertFalse(self.receiver.received is None) self.assertTrue(self.receiver.received[1] is None) self.assertEqual(self.m1.get_name(), self.receiver.received[0].get_name()) counter_list = [c for c in self.monitor.each_counter()] self.assertTrue(len(counter_list) == 1) name, value = counter_list[0] self.assertEqual("reads filtered: low quality", name) self.assertEqual(1, value) def test_filter_two(self): self.filter.min_hit_quality = self.m1.qual + 1 self.filter.process(self.pair) self.assertTrue(self.receiver.received is None) counter_list = [c for c in self.monitor.each_counter()] self.assertTrue(len(counter_list) == 1) name, value = counter_list[0] self.assertEqual("reads filtered: low quality", name) self.assertEqual(2, value) def test_without_next_link(self): h = FilterLink(self.monitor) h.process(self.pair) # shouldn't raise def test_filter_unmapped_1(self): self.m1.set_mapped(False) self.m1.qual = 0 self.filter.process(self.pair) self.assertTrue(self.receiver.received[0] is None) self.assertFalse(self.receiver.received[1] is None) counter_list = [c for c in self.monitor.each_counter()] self.assertTrue(len(counter_list) == 1) name, value = counter_list[0] self.assertEqual("reads filtered: unmapped", name) self.assertEqual(1, value)
class TestFilterLink(unittest.TestCase): # mini object to let us peek at what the filter forwards to the next link class Receiver(HitProcessorChainLink): def __init__(self, *args): super(type(self), self).__init__(*args) self.received = None def process(self, pair): self.received = pair def setUp(self): self.monitor = StandardMonitor(SavingLogger()) self.filter = FilterLink(self.monitor) self.receiver = self.filter.set_next(type(self).Receiver()) # create two mappings, m1, m2. We put them in self.pair # m1 has: # name = first # tid = tid1 # m2 has: # name = second # tid = tid2 self.pair = [ SimpleMapping(), SimpleMapping() ] self.m1, self.m2 = self.pair self.m1.set_name("first") ; self.m2.set_name("second") self.m1.tid = "tid1" ; self.m2.tid = "tid2" self.m1.qual = 50 ; self.m2.qual = 30 def test_constructor_link(self): h = FilterLink(self.monitor) self.assertTrue(h.next_link is None) other = HitProcessorChainLink() h = FilterLink(self.monitor, other) self.assertEqual(other, h.next_link) def test_filter_none(self): self.filter.process(self.pair) self.assertFalse(self.receiver.received is None) self.assertEqual(self.m1.get_name(), self.receiver.received[0].get_name()) self.assertEqual(self.m2.get_name(), self.receiver.received[1].get_name()) # ensure there are no counters (i.e. nothing was filtered) self.assertFalse( [ c for c in self.monitor.each_counter() ] ) def test_filter_one(self): self.filter.min_hit_quality = self.m2.qual + 1 self.filter.process(self.pair) self.assertFalse(self.receiver.received is None) self.assertTrue(self.receiver.received[1] is None) self.assertEqual(self.m1.get_name(), self.receiver.received[0].get_name()) counter_list = [ c for c in self.monitor.each_counter() ] self.assertTrue(len(counter_list) == 1) name, value = counter_list[0] self.assertEqual("reads filtered: low quality", name) self.assertEqual(1, value) def test_filter_two(self): self.filter.min_hit_quality = self.m1.qual + 1 self.filter.process(self.pair) self.assertTrue(self.receiver.received is None) counter_list = [ c for c in self.monitor.each_counter() ] self.assertTrue(len(counter_list) == 1) name, value = counter_list[0] self.assertEqual("reads filtered: low quality", name) self.assertEqual(2, value) def test_without_next_link(self): h = FilterLink(self.monitor) h.process(self.pair) # shouldn't raise def test_filter_unmapped_1(self): self.m1.set_mapped(False) self.m1.qual = 0 self.filter.process(self.pair) self.assertTrue(self.receiver.received[0] is None) self.assertFalse(self.receiver.received[1] is None) counter_list = [ c for c in self.monitor.each_counter() ] self.assertTrue(len(counter_list) == 1) name, value = counter_list[0] self.assertEqual("reads filtered: unmapped", name) self.assertEqual(1, value)