示例#1
0
    def test_18_filter_reads(self):
        if ONLY and ONLY != '18':
            return
        if CHKTIME:
            t0 = time()
        for ali in ['map', 'sam']:
            seed(1)
            if 13436 == int(random()*100000):
                same_seed = True
                genome = generate_random_ali(ali)
                genome_bis = parse_fasta('test.fa~', verbose=False)
                self.assertEqual(genome, genome_bis)
            else:
                same_seed = False
                genome = parse_fasta('test.fa~')
            # PARSE SAM
            if ali == 'map':
                from pytadbit.parsers.map_parser import parse_map as parser
            else:
                try:
                    from pytadbit.parsers.sam_parser import parse_sam as parser
                except ImportError:
                    print 'ERROR: PYSAM not found, skipping test\n'
                    continue

            parser(['test_read1.%s~' % (ali)], ['test_read2.%s~' % (ali)],
                   './lala1-%s~' % (ali), './lala2-%s~' % (ali), genome,
                   re_name='DPNII', mapper='GEM')

            # GET INTERSECTION
            from pytadbit.mapping import get_intersection
            get_intersection('lala1-%s~' % (ali), 'lala2-%s~' % (ali),
                             'lala-%s~' % (ali))
            # FILTER
            masked = filter_reads('lala-%s~' % (ali), verbose=False,
                                  fast=(ali=='map'))
            self.assertEqual(masked[1]['reads'], 1000)
            self.assertEqual(masked[2]['reads'], 1000)
            self.assertEqual(masked[3]['reads'], 1000)
            self.assertEqual(masked[4]['reads'], 1000)
            if same_seed:
                self.assertEqual(masked[5]['reads'], 1110)
                self.assertEqual(masked[6]['reads'], 2332)
                self.assertEqual(masked[7]['reads'], 0)
                self.assertEqual(masked[8]['reads'], 141)
                self.assertEqual(masked[10]['reads'], 1)
            else:
                self.assertTrue (masked[5]['reads'] > 1000)
            self.assertEqual(masked[9]['reads'], 1000)
        apply_filter('lala-map~', 'lala-map-filt~', masked, filters=[1],
                     reverse=True, verbose=False)
        self.assertEqual(len([True for l in open('lala-map-filt~')
                              if not l.startswith('#')]), 1000)
        d = plot_iterative_mapping('lala1-map~', 'lala2-map~')
        self.assertEqual(d[0][1], 6000)

        if CHKTIME:
            self.assertEqual(True, True)
            print '18', time() - t0
示例#2
0
    def test_18_filter_reads(self):
        if ONLY and ONLY != '18':
            return
        if CHKTIME:
            t0 = time()
        for ali in ['map', 'sam']:
            seed(1)
            if 13436 == int(random()*100000):
                same_seed = True
                genome = generate_random_ali(ali)
                genome_bis = parse_fasta('test.fa~', verbose=False)
                self.assertEqual(genome, genome_bis)
            else:
                same_seed = False
                genome = parse_fasta('test.fa~')
            # PARSE SAM
            if ali == 'map':
                from pytadbit.parsers.map_parser import parse_map as parser
            else:
                try:
                    from pytadbit.parsers.sam_parser import parse_sam as parser
                except ImportError:
                    print 'ERROR: PYSAM not found, skipping test\n'
                    continue

            parser(['test_read1.%s~' % (ali)], ['test_read2.%s~' % (ali)],
                   './lala1-%s~' % (ali), './lala2-%s~' % (ali), genome,
                   re_name='DPNII', mapper='GEM')

            # GET INTERSECTION
            from pytadbit.mapping import get_intersection
            get_intersection('lala1-%s~' % (ali), 'lala2-%s~' % (ali),
                             'lala-%s~' % (ali))
            # FILTER
            masked = filter_reads('lala-%s~' % (ali), verbose=False,
                                  fast=(ali=='map'))
            self.assertEqual(masked[1]['reads'], 1000)
            self.assertEqual(masked[2]['reads'], 1000)
            self.assertEqual(masked[3]['reads'], 1000)
            self.assertEqual(masked[4]['reads'], 1000)
            if same_seed:
                self.assertEqual(masked[5]['reads'], 1110)
                self.assertEqual(masked[6]['reads'], 2332)
                self.assertEqual(masked[7]['reads'], 0)
                self.assertEqual(masked[8]['reads'], 141)
                self.assertEqual(masked[10]['reads'], 1)
            else:
                self.assertTrue (masked[5]['reads'] > 1000)
            self.assertEqual(masked[9]['reads'], 1000)
        apply_filter('lala-map~', 'lala-map-filt~', masked, filters=[1],
                     reverse=True, verbose=False)
        self.assertEqual(len([True for l in open('lala-map-filt~')
                              if not l.startswith('#')]), 1000)
        d = plot_iterative_mapping('lala1-map~', 'lala2-map~')
        self.assertEqual(d[0][1], 6000)

        if CHKTIME:
            self.assertEqual(True, True)
            print '18', time() - t0
示例#3
0
    def test_18_filter_reads(self):
        if ONLY and not "18" in ONLY:
            return
        if CHKTIME:
            t0 = time()
        for ali in ["map", "sam"]:
            seed(1)
            if 13436 == int(random()*100000):
                same_seed = True
                genome = generate_random_ali(ali)
                genome_bis = parse_fasta("test.fa~", verbose=False)
                self.assertEqual(genome, genome_bis)
            else:
                same_seed = False
                genome = parse_fasta("test.fa~")
            # PARSE SAM
            if ali == "map":
                from pytadbit.parsers.map_parser import parse_map as parser
            else:
                try:
                    from pytadbit.parsers.sam_parser import parse_sam as parser
                except ImportError:
                    print "ERROR: PYSAM not found, skipping test\n"
                    continue

            parser(["test_read1.%s~" % (ali)], ["test_read2.%s~" % (ali)],
                   "./lala1-%s~" % (ali), "./lala2-%s~" % (ali), genome,
                   re_name="DPNII", mapper="GEM")

            # GET INTERSECTION
            from pytadbit.mapping import get_intersection
            get_intersection("lala1-%s~" % (ali), "lala2-%s~" % (ali),
                             "lala-%s~" % (ali))
            # FILTER
            masked = filter_reads("lala-%s~" % (ali), verbose=False,
                                  fast=(ali=="map"))
            self.assertEqual(masked[1]["reads"], 1000)
            self.assertEqual(masked[2]["reads"], 1000)
            self.assertEqual(masked[3]["reads"], 1000)
            self.assertEqual(masked[4]["reads"], 1000)
            if same_seed:
                self.assertEqual(masked[5]["reads"], 1110)
                self.assertEqual(masked[6]["reads"], 2332)
                self.assertEqual(masked[7]["reads"], 0)
                self.assertEqual(masked[8]["reads"], 141)
                self.assertEqual(masked[10]["reads"], 1)
            else:
                self.assertTrue (masked[5]["reads"] > 1000)
            self.assertEqual(masked[9]["reads"], 1000)
        apply_filter("lala-map~", "lala-map-filt~", masked, filters=[1],
                     reverse=True, verbose=False)
        self.assertEqual(len([True for l in open("lala-map-filt~")
                              if not l.startswith("#")]), 1000)
        d = plot_iterative_mapping("lala1-map~", "lala2-map~")
        self.assertEqual(d[0][1], 6000)

        if CHKTIME:
            self.assertEqual(True, True)
            print "18", time() - t0
示例#4
0
    def test_18_filter_reads(self):
        if ONLY and ONLY != "18":
            return
        if CHKTIME:
            t0 = time()
        for ali in ["map", "sam"]:
            seed(1)
            if 13436 == int(random() * 100000):
                same_seed = True
                genome = generate_random_ali(ali)
                genome_bis = parse_fasta("test.fa~", verbose=False)
                self.assertEqual(genome, genome_bis)
            else:
                same_seed = False
                genome = parse_fasta("test.fa~")
            # PARSE SAM
            if ali == "map":
                from pytadbit.parsers.map_parser import parse_map as parser
            else:
                try:
                    from pytadbit.parsers.sam_parser import parse_sam as parser
                except ImportError:
                    print "ERROR: PYSAM not found, skipping test\n"
                    continue

            parser(
                ["test_read1.%s~" % (ali)],
                ["test_read2.%s~" % (ali)],
                "./lala1-%s~" % (ali),
                "./lala2-%s~" % (ali),
                genome,
                re_name="DPNII",
                mapper="GEM",
            )

            # GET INTERSECTION
            from pytadbit.mapping import get_intersection

            get_intersection("lala1-%s~" % (ali), "lala2-%s~" % (ali), "lala-%s~" % (ali))
            # FILTER
            masked = filter_reads("lala-%s~" % (ali), verbose=False, fast=(ali == "map"))
            self.assertEqual(masked[1]["reads"], 1000)
            self.assertEqual(masked[2]["reads"], 1000)
            self.assertEqual(masked[3]["reads"], 1000)
            self.assertEqual(masked[4]["reads"], 1000)
            if same_seed:
                self.assertEqual(masked[5]["reads"], 1110)
                self.assertEqual(masked[6]["reads"], 2332)
                self.assertEqual(masked[7]["reads"], 0)
                self.assertEqual(masked[8]["reads"], 141)
                self.assertEqual(masked[10]["reads"], 1)
            else:
                self.assertTrue(masked[5]["reads"] > 1000)
            self.assertEqual(masked[9]["reads"], 1000)
        apply_filter("lala-map~", "lala-map-filt~", masked, filters=[1], reverse=True, verbose=False)
        self.assertEqual(len([True for l in open("lala-map-filt~") if not l.startswith("#")]), 1000)
        d = plot_iterative_mapping("lala1-map~", "lala2-map~")
        self.assertEqual(d[0][1], 6000)

        if CHKTIME:
            self.assertEqual(True, True)
            print "18", time() - t0
示例#5
0
plt.rcParams['font.weight'] = 'medium'
#plt.rcParams['font.family'] = 'sans-serif'
#plt.rcParams['font.sans-serif'] = 'Arial'
plt.rcParams['lines.linewidth'] = 2.0
plt.rcParams['legend.numpoints'] = 1
plt.rcParams['legend.frameon'] = False
plt.rcParams['savefig.bbox'] = 'tight'

# Plot: fraction of mapped reads
infiles = [maps1, maps2]
pair_id = infiles[0].split("/")[-1].replace("_read1_map.tsv", "")
outfile = '%s/%s_plot_proportion_mapped_reads.png' % (POSTMAPPING_PLOTS,
                                                      pair_id)
reads_mapped_per_iteration = plot_iterative_mapping(
    fnam1=infiles[0],
    fnam2=infiles[1],
    total_reads=n_reads_trimmed,
    savefig=outfile)
reads_mapped_per_iteration = pd.DataFrame.from_dict(reads_mapped_per_iteration)
reads_mapped_per_iteration.columns = ['read1', 'read2']
fraction_mapped_read1 = list(
    reads_mapped_per_iteration['read1'])[-1] / float(n_reads_trimmed)
fraction_mapped_read2 = list(
    reads_mapped_per_iteration['read2'])[-1] / float(n_reads_trimmed)
fraction_mapped_str = ",".join(
    [str(i) for i in [fraction_mapped_read1, fraction_mapped_read2]])

# Plot: distribution of dangling-end lengths
plt.rcParams['font.size'] = 12
infile = '%s/%s_both_map.tsv' % (PROCESSED, pair_id)
outfile = '%s/%s_plot_distribution_dangling_ends_lengths.png' % (