def test_fragment_with_duplicate_in_pair_1(self): # Ensure the reducer catches a fragment duplicate of pair[0] p = list(test_utils.pair1()) self.__ctx.add_value(test_utils.make_key(p[0]), proto.serialize_pair(p)) test_utils.erase_read2(p) self.__ctx.add_value(test_utils.make_key(p[0]), proto.serialize_pair(p)) self.__reducer.reduce(self.__ctx) # now ensure that the pair was emitted, but not the fragment self.__ensure_only_pair1_emitted() self.assertEqual(1, len(self.__ctx.emitted.keys())) self.assertEqual(2, len(self.__ctx.emitted.values()[0])) # two SAM records associated with the key (for the pair) # check counter self.assertFalse(self.__ctx.counters.has_key(self.__pair_counter_name())) self.assertTrue(self.__ctx.counters.has_key(self.__frag_counter_name())) self.assertEqual(1, self.__ctx.counters[self.__frag_counter_name()])
def test_fragment_with_duplicate_in_pair_1_no_discard(self): # Ensure the reducer catches a fragment duplicate of pair[0] p = list(test_utils.pair1()) self.__ctx.add_value(test_utils.make_key(p[0]), proto.serialize_pair(p)) p = test_utils.erase_read2(p) self.__ctx.add_value(test_utils.make_key(p[0]), proto.serialize_pair(p)) self.__reducer.discard_duplicates = False self.__reducer.reduce(self.__ctx) # now ensure that both were emitted, but the fragment is marked as duplicate self.__ensure_pair1_emitted() self.assertEqual(1, len(self.__ctx.emitted.keys())) self.assertEqual(3, len(self.__ctx.emitted.values()[0])) # 3 SAM records associated with the key (for the pair) # make sure we have a read with the duplicate flag set regexp = "(\d+)\s+.*" flags = [ int(re.match(regexp, value).group(1)) for value in self.__ctx.emitted.values()[0] ] dup_flags = [ flag for flag in flags if flag & sam_flags.SAM_FDP ] self.assertEqual(1, len(dup_flags)) f = dup_flags[0] self.assertTrue( f & sam_flags.SAM_FR1 > 0 ) # ensure the duplicate read is r1 self.assertTrue( f & sam_flags.SAM_FPD == 0 ) # ensure the duplicate read is unpaired # check counter self.assertFalse(self.__ctx.counters.has_key(self.__pair_counter_name())) self.assertTrue(self.__ctx.counters.has_key(self.__frag_counter_name())) self.assertEqual(1, self.__ctx.counters[self.__frag_counter_name()])
def test_duplicate_fragments_read1_no_discard(self): # load pair 1 and erase its second read p = list(test_utils.pair1()) p = test_utils.erase_read2(p) p0 = p[0] # insert the pair into the context, twice self.__ctx.add_value(test_utils.make_key(p[0]), proto.serialize_pair(p)) self.__ctx.add_value(test_utils.make_key(p[0]), proto.serialize_pair(p)) self.__reducer.discard_duplicates = False self.__reducer.reduce(self.__ctx) self.assertEqual(1, len(self.__ctx.emitted.keys())) self.assertEqual(2, len(self.__ctx.emitted.values() [0])) # Two SAM records associated with the key short_name = p0.get_name()[0:-2] self.assertEqual(short_name, self.__ctx.emitted.keys()[0]) flags = map(lambda sam: int(*re.match("(\d+).*", sam).groups(1)), self.__ctx.emitted.values()[0]) # ensure we have one marked as duplicate self.assertEqual( 1, len(filter(lambda flag: flag & sam_flags.SAM_FDP, flags))) # and ensure we have one NOT marked as duplicates self.assertEqual( 1, len(filter(lambda flag: flag & sam_flags.SAM_FDP == 0, flags))) # check counter self.assertFalse( self.__ctx.counters.has_key(self.__pair_counter_name())) self.assertTrue(self.__ctx.counters.has_key( self.__frag_counter_name())) self.assertEqual(1, self.__ctx.counters[self.__frag_counter_name()])
def test_duplicate_fragments_read1(self): # load pair 1 p = list(test_utils.pair1()) p = test_utils.erase_read2(p) p0 = p[0] # insert the pair into the context, twice self.__ctx.add_value(test_utils.make_key(p[0]), proto.serialize_pair(p)) self.__ctx.add_value(test_utils.make_key(p[0]), proto.serialize_pair(p)) self.__reducer.reduce(self.__ctx) self.assertEqual(1, len(self.__ctx.emitted.keys())) self.assertEqual( 1, len(self.__ctx.emitted.values() [0])) # only one SAM record associated with the key short_name = p0.get_name()[0:-2] self.assertEqual(short_name, self.__ctx.emitted.keys()[0]) self.assertTrue( re.match("\d+\s+%s\s+%d\s+.*" % (p0.tid, p0.pos), self.__ctx.emitted[short_name][0])) # check counter self.assertFalse( self.__ctx.counters.has_key(self.__pair_counter_name())) self.assertTrue(self.__ctx.counters.has_key( self.__frag_counter_name())) self.assertEqual(1, self.__ctx.counters[self.__frag_counter_name()])
def test_fragment_with_duplicate_in_pair_1(self): # Ensure the reducer catches a fragment duplicate of pair[0] p = list(test_utils.pair1()) self.__ctx.add_value(test_utils.make_key(p[0]), proto.serialize_pair(p)) test_utils.erase_read2(p) self.__ctx.add_value(test_utils.make_key(p[0]), proto.serialize_pair(p)) self.__reducer.reduce(self.__ctx) # now ensure that the pair was emitted, but not the fragment self.__ensure_only_pair1_emitted() self.assertEqual(1, len(self.__ctx.emitted.keys())) self.assertEqual(2, len( self.__ctx.emitted.values() [0])) # two SAM records associated with the key (for the pair) # check counter self.assertFalse( self.__ctx.counters.has_key(self.__pair_counter_name())) self.assertTrue(self.__ctx.counters.has_key( self.__frag_counter_name())) self.assertEqual(1, self.__ctx.counters[self.__frag_counter_name()])
def test_emit_forward_fragment2(self): # Fragment in pair[0]. None in pair[1] self.pair1 = test_utils.erase_read2(list(self.pair1)) self.link.process(self.pair1) self.assertEqual(1, len(self.ctx.emitted.keys())) expected_key = test_utils.make_key(self.pair1[0]) self.assertEqual(1, len(self.ctx.emitted[expected_key])) unserialized = proto.unserialize_pair(self.ctx.emitted[expected_key][0]) self.assertTrue(unserialized[1] is None) self.assertEqual(self.pair1[0].tid, unserialized[0].tid) self.assertEqual(self.pair1[0].pos, unserialized[0].pos) self.assertTrue(self.ctx.counters.has_key("Test:MAPPED COORDINATES")) self.assertEqual(1, self.ctx.counters["Test:MAPPED COORDINATES"])
def test_emit_forward_fragment2(self): # Fragment in pair[0]. None in pair[1] self.pair1 = test_utils.erase_read2(list(self.pair1)) self.link.process(self.pair1) self.assertEqual(1, len(self.ctx.emitted.keys())) expected_key = test_utils.make_key(self.pair1[0]) self.assertEqual(1, len(self.ctx.emitted[expected_key])) unserialized = proto.unserialize_pair( self.ctx.emitted[expected_key][0]) self.assertTrue(unserialized[1] is None) self.assertEqual(self.pair1[0].tid, unserialized[0].tid) self.assertEqual(self.pair1[0].pos, unserialized[0].pos) self.assertTrue(self.ctx.counters.has_key("Test:MAPPED COORDINATES")) self.assertEqual(1, self.ctx.counters["Test:MAPPED COORDINATES"])
def test_duplicate_fragments_read1(self): # load pair 1 p = list(test_utils.pair1()) p = test_utils.erase_read2(p) p0 = p[0] # insert the pair into the context, twice self.__ctx.add_value(test_utils.make_key(p[0]), proto.serialize_pair(p)) self.__ctx.add_value(test_utils.make_key(p[0]), proto.serialize_pair(p)) self.__reducer.reduce(self.__ctx) self.assertEqual(1, len(self.__ctx.emitted.keys())) self.assertEqual(1, len(self.__ctx.emitted.values()[0])) # only one SAM record associated with the key short_name = p0.get_name()[0:-2] self.assertEqual(short_name, self.__ctx.emitted.keys()[0]) self.assertTrue( re.match("\d+\s+%s\s+%d\s+.*" % (p0.tid, p0.pos), self.__ctx.emitted[short_name][0]) ) # check counter self.assertFalse(self.__ctx.counters.has_key(self.__pair_counter_name())) self.assertTrue(self.__ctx.counters.has_key(self.__frag_counter_name())) self.assertEqual(1, self.__ctx.counters[self.__frag_counter_name()])
def test_fragment_with_duplicate_in_pair_1_no_discard(self): # Ensure the reducer catches a fragment duplicate of pair[0] p = list(test_utils.pair1()) self.__ctx.add_value(test_utils.make_key(p[0]), proto.serialize_pair(p)) p = test_utils.erase_read2(p) self.__ctx.add_value(test_utils.make_key(p[0]), proto.serialize_pair(p)) self.__reducer.discard_duplicates = False self.__reducer.reduce(self.__ctx) # now ensure that both were emitted, but the fragment is marked as duplicate self.__ensure_pair1_emitted() self.assertEqual(1, len(self.__ctx.emitted.keys())) self.assertEqual(3, len( self.__ctx.emitted.values() [0])) # 3 SAM records associated with the key (for the pair) # make sure we have a read with the duplicate flag set regexp = "(\d+)\s+.*" flags = [ int(re.match(regexp, value).group(1)) for value in self.__ctx.emitted.values()[0] ] dup_flags = [flag for flag in flags if flag & sam_flags.SAM_FDP] self.assertEqual(1, len(dup_flags)) f = dup_flags[0] self.assertTrue( f & sam_flags.SAM_FR1 > 0) # ensure the duplicate read is r1 self.assertTrue( f & sam_flags.SAM_FPD == 0) # ensure the duplicate read is unpaired # check counter self.assertFalse( self.__ctx.counters.has_key(self.__pair_counter_name())) self.assertTrue(self.__ctx.counters.has_key( self.__frag_counter_name())) self.assertEqual(1, self.__ctx.counters[self.__frag_counter_name()])
def test_duplicate_fragments_read1_no_discard(self): # load pair 1 and erase its second read p = list(test_utils.pair1()) p = test_utils.erase_read2(p) p0 = p[0] # insert the pair into the context, twice self.__ctx.add_value(test_utils.make_key(p[0]), proto.serialize_pair(p)) self.__ctx.add_value(test_utils.make_key(p[0]), proto.serialize_pair(p)) self.__reducer.discard_duplicates = False self.__reducer.reduce(self.__ctx) self.assertEqual(1, len(self.__ctx.emitted.keys())) self.assertEqual(2, len(self.__ctx.emitted.values()[0])) # Two SAM records associated with the key short_name = p0.get_name()[0:-2] self.assertEqual(short_name, self.__ctx.emitted.keys()[0]) flags = map(lambda sam: int(*re.match("(\d+).*", sam).groups(1)), self.__ctx.emitted.values()[0]) # ensure we have one marked as duplicate self.assertEqual(1, len(filter(lambda flag: flag & sam_flags.SAM_FDP, flags)) ) # and ensure we have one NOT marked as duplicates self.assertEqual(1, len(filter(lambda flag: flag & sam_flags.SAM_FDP == 0, flags)) ) # check counter self.assertFalse(self.__ctx.counters.has_key(self.__pair_counter_name())) self.assertTrue(self.__ctx.counters.has_key(self.__frag_counter_name())) self.assertEqual(1, self.__ctx.counters[self.__frag_counter_name()])