示例#1
0
def draw_me_something_nice(infile, outfile, outfile2):
    """function to draw genome diagrams by looping over
a load of embl files in a folder>>> this is supposed to add
effectors of interest on as coloured items"""
    genbank_entry = SeqIO.read(open(infile), "embl")
    name_for_info_out = infile.split(".embl")[0] + "effecotr_info.txt"
    f_general_output = open(name_for_info_out, "w")
    #print "im here"
    gdd = Diagram('Test Diagram')
    #Add a track of features,
    gdt_features = gdd.new_track(
        1,
        greytrack=True,
        name="CDS Features",
        scale_largetick_interval=100000,
        scale_smalltick_interval=5000,
        scale_fontsize=3,
        scale_format="SInt",
        greytrack_labels=False,  #e.g. 5
        height=0.75)

    #We'll just use one feature set for these features,
    gds_features = gdt_features.new_set()

    add_jaggies(str(genbank_entry.seq), 0, gds_features)

    #genes of interest
    effectors = """#gene
GPALN001111
GPALN001252
GPALN001912
GPALN002106
GPALN002290
GPALN002295
GPALN002300
GPALN002383
GPALN002386
GPALN002387
GPALN002593
GPALN002947
GPALN003010
GPALN003306
GPALN003381
GPALN003415
GPALN003793
GPALN003794
GPALN003795
GPALN003831
GPALN003952
GPALN003970
GPALN003975
GPALN004254
GPALN004470
GPALN004493
GPALN004587
GPALN004712
GPALN004734
GPALN004862
GPALN004897
GPALN005042
GPALN005067
GPALN005090
GPALN005100
GPALN005105
GPALN005801
GPALN005901
GPALN005903
GPALN005905
GPALN005953
GPALN006035
GPALN006057
GPALN006059
GPALN006061
GPALN006067
GPALN006752
GPALN006754
GPALN006755
GPALN006756
GPALN006759
GPALN006766
GPALN006769
GPALN006775
GPALN006818
GPALN006828
GPALN006839
GPALN006853
GPALN006856
GPALN006945
GPALN007181
GPALN007436
GPALN007445
GPALN007670
GPALN007708
GPALN007837
GPALN008101
GPALN009056
GPALN009444
GPALN009458
GPALN009497
GPALN009498
GPALN009532
GPALN009580
GPALN009586
GPALN009589
GPALN009796
GPALN009815
GPALN009825
GPALN009837
GPALN009900
GPALN009918
GPALN010093
GPALN010199
GPALN010232
GPALN010540
GPALN010542
GPALN010554
GPALN010603
GPALN010659
GPALN010702
GPALN010737
GPALN010789
GPALN010793
GPALN010968
GPALN010970
GPALN011399
GPALN011823
GPALN011858
GPALN011865
GPALN012007
GPALN012056
GPALN012064
GPALN012287
GPALN013168
GPALN013277
GPALN013280
GPALN013347
GPALN013348
GPALN013350
GPALN013383
GPALN013384
GPALN013459
GPALN013480
GPALN013496
GPALN014034
GPALN014145
GPALN014146
GPALN014235
GPALN014268
GPALN014324
GPALN014327
GPALN014354
GPALN014355
GPALN014357
GPALN014378
GPALN014379
GPALN014381
GPALN014395
GPALN014477
GPALN014498
GPALN014747
GPALN014750
GPALN014881
GPALN015014
GPALN015073
GPALN015211
GPALN015248
GPALN015280
GPALN015295
GPALN015296
GPALN015298
GPALN015299
GPALN015301
GPALN015302
GPALN015304
GPALN015309
GPALN015425
GPALN015632
GPALN016298
GPALN016343
GPALN016360
GPALN016380
GPALN002204
GPALN002370
GPALN002666
GPALN002991
GPALN003997
GPALN004009
GPALN005554
GPALN007648
GPALN012415
GPALN013387
GPALN013545
GPALN014713
GPALN015272
GPALN015314
GPALN015605
GPALN015654
GPALN002028
GPALN002288
GPALN002969
GPALN003083
GPALN003416
GPALN003852
GPALN004011
GPALN004265
GPALN004480
GPALN004881
GPALN004901
GPALN005017
GPALN005038
GPALN006124
GPALN007079
GPALN007178
GPALN009323
GPALN010231
GPALN010636
GPALN011715
GPALN011857
GPALN012062
GPALN014261
GPALN014271
GPALN014665
GPALN014857
GPALN015013
GPALN015100
GPALN015116
GPALN015172
GPALN015218
GPALN000707
GPALN001149
GPALN001153
GPALN001281
GPALN001284
GPALN001315
GPALN001641
GPALN001729
GPALN001738
GPALN001745
GPALN002294
GPALN002346
GPALN002349
GPALN002377
GPALN002379
GPALN002466
GPALN002494
GPALN003077
GPALN003222
GPALN003326
GPALN003368
GPALN003369
GPALN003846
GPALN003860
GPALN003876
GPALN003882
GPALN003891
GPALN003905
GPALN003908
GPALN003910
GPALN003911
GPALN003912
GPALN003913
GPALN003925
GPALN003942
GPALN003943
GPALN003946
GPALN003949
GPALN003953
GPALN003954
GPALN003955
GPALN003977
GPALN003990
GPALN004007
GPALN004008
GPALN004010
GPALN004014
GPALN004017
GPALN004018
GPALN004064
GPALN004130
GPALN004342
GPALN004369
GPALN004380
GPALN004410
GPALN004411
GPALN004506
GPALN004534
GPALN004553
GPALN004554
GPALN004555
GPALN004557
GPALN004678
GPALN004679
GPALN004681
GPALN004798
GPALN005064
GPALN005129
GPALN005160
GPALN005161
GPALN005611
GPALN005738
GPALN005986
GPALN006031
GPALN006038
GPALN006112
GPALN006223
GPALN006413
GPALN006581
GPALN006596
GPALN006778
GPALN006780
GPALN006782
GPALN006860
GPALN006911
GPALN007051
GPALN007058
GPALN007072
GPALN007120
GPALN007129
GPALN007132
GPALN007139
GPALN007179
GPALN007201
GPALN007443
GPALN007647
GPALN007696
GPALN007748
GPALN007796
GPALN007811
GPALN007848
GPALN007899
GPALN008074
GPALN008097
GPALN008098
GPALN008100
GPALN008102
GPALN008108
GPALN008152
GPALN008161
GPALN008462
GPALN008535
GPALN009441
GPALN009443
GPALN009492
GPALN009505
GPALN009640
GPALN009669
GPALN009670
GPALN009695
GPALN009823
GPALN009839
GPALN009902
GPALN010067
GPALN010126
GPALN010127
GPALN010171
GPALN010316
GPALN010321
GPALN010414
GPALN010416
GPALN010432
GPALN010433
GPALN010511
GPALN010519
GPALN010534
GPALN010536
GPALN010598
GPALN010602
GPALN010621
GPALN010625
GPALN010778
GPALN010795
GPALN010824
GPALN011797
GPALN011812
GPALN011852
GPALN012010
GPALN012025
GPALN012067
GPALN012099
GPALN012127
GPALN012134
GPALN012284
GPALN012357
GPALN012358
GPALN012366
GPALN012838
GPALN013104
GPALN013109
GPALN013144
GPALN013150
GPALN013385
GPALN014005
GPALN014368
GPALN014369
GPALN014370
GPALN014371
GPALN014372
GPALN014377
GPALN014397
GPALN014398
GPALN014514
GPALN014539
GPALN014576
GPALN014672
GPALN014707
GPALN014746
GPALN014851
GPALN014865
GPALN014866
GPALN014867
GPALN014868
GPALN014885
GPALN014904
GPALN015061
GPALN015177
GPALN015178
GPALN015179
GPALN015181
GPALN015182
GPALN015183
GPALN015186
GPALN015188
GPALN015193
GPALN015243
GPALN015262
GPALN015279
GPALN015285
GPALN015291
GPALN015297
GPALN015738
GPALN015769
GPALN016090
GPALN016091
GPALN016117
GPALN016181
GPALN016188
GPALN016330
GPALN016378""".split("\n")

    SPRYSEC = """GPALN012056.T1
GPALN009532.T1
GPALN003794.T1
GPALN014357.T1
GPALN010968.T1
GPALN001352.T1
GPALN006035.T1
GPALN007139.T1
GPALN013168.T1
GPALN006853.T1
GPALN010970.T1
GPALN014477.T1
GPALN015302.T1
GPALN012007.T1
GPALN015309.T1
GPALN010793.T1
GPALN006818.T1
GPALN013114.T1
GPALN006860.T1
GPALN009815.T1
GPALN006839.T1
GPALN006856.T1
GPALN004734.T1
GPALN006596.T1
GPALN013383.T1
GPALN011823.T1
GPALN012287.T1
GPALN009918.T1
GPALN014398.T1
GPALN010231.T1
GPALN009669.T1
GPALN010232.T1
GPALN013348.T1
GPALN013350.T1
GPALN010645.T1
GPALN010093.T1
GPALN014397.T1
GPALN002288.T1
GPALN002300.T1
GPALN011858.T1
GPALN015298.T1
GPALN013480.T1
GPALN009458.T1
GPALN010789.T1
GPALN007168.T1
GPALN008646.T1
GPALN006775.T1
GPALN015295.T1
GPALN004897.T1
GPALN002290.T1
GPALN015013.T1
GPALN014271.T1
GPALN015632.T1
GPALN015301.T1
GPALN014355.T1
GPALN007445.T1
GPALN015280.T1
GPALN007711.T1
GPALN015314.T1
GPALN010569.T1
GPALN007132.T1
GPALN006828.T1
GPALN004881.T1
GPALN007129.T1
GPALN013385.T1
GPALN003057.T1
GPALN015407.T1
GPALN004265.T1
GPALN014395.T1
GPALN012062.T1
GPALN001780.T1
GPALN012064.T1
GPALN007120.T1
GPALN005953.T1
GPALN003793.T1
GPALN015813.T1
GPALN016040.T1""".split("\n")

    SPRY = """
""".split("\n")
    Dorsal_set = set([])
    J2_set = set([])
    dpi_14_set = set([])
    names = set([])
    effector_list = []
    for i in effectors:
        if i not in names:
            names.add(i + ".T1")
            effector_list.append(i + ".T1")
    dpi_14 = []
    J2 = []
    count = 0
    for feature in genbank_entry.features:
        count = count + 1
        shape = "ARROW"
        #if feature.type not in ["CDS", "tRNA", "rRNA"] :
        if feature.type in ["source", "gene"]:  #["source", "CDS"]
            #print "CDS"
            #We're going to ignore these (ignore genes as the CDS is enough)
            continue

        #Note that I am using strings for color names, instead
        #of passing in color objects.  This should also work!
        color2 = "grey"
        if feature.type == "tRNA":
            color = "red"
        elif feature.type == "rRNA":
            color = "purple"
        elif feature.type == "gap":
            color = "grey"
            shape = "JAGGY"
            feature.strand = None  #i.e. draw it strandless
        elif feature.type != "CDS":
            color = "lightgreen"
        # adding two features per gene, so not just odd/even:
        #elif len(gds_features) % 4 == 0 :
        elif count % 2 == 0:
            color = "blue"
            color2 = "lightblue"
            color = colors.Color(0, 0, 1, 0.4)
            color2 = colors.Color(.678431, .847059, .901961, 0.2)
        else:
            color = "green"
            color2 = "lightgreen"
            color = colors.Color(0, 0.501961, 0, 0.4)
            color2 = colors.Color(0.564706, 0.933333, 0.564706, 0.2)
        #colour the Dorsal genes yellow

        for gene_name in effector_list:
            # print(feature.qualifiers.get("locus_tag", [None])[0].replace(";", ""))
            if feature.qualifiers.get("locus_tag", [None])[0].replace(
                    ";", "") in gene_name.rstrip():
                color = "red"
                color2 = "pink"
                f_general_output.write("effector\t%s\t%s\n" %
                                       (infile, gene_name))
                print("effector\t%s\t%s\n" % (infile, gene_name))

        for gene_name in SPRYSEC:
            #print(feature.qualifiers.get("locus_tag",
            #[None])[0].replace(";", ""), gene_name)
            if feature.qualifiers.get("locus_tag", [None])[0].replace(
                    ";", "") in gene_name.rstrip():
                color = "blue"
                color2 = "lightblue"
                f_general_output.write("SPRYSEC\t%s\t%s\n" %
                                       (infile, gene_name))
                print("SPRYSEC\t%s\t%s\n" % (infile, gene_name))

        gds_features.add_feature(
            squash_exons(feature),
            color=color2,
            sigil="BOX",
            #sigil=shape,
            arrowshaft_height=0.8,
            arrowhead_length=0.5,
            label_position="start",
            label_size=1,
            label_angle=90,
            label=True)
        # Don't want the line round the feature as starts to overlap
        gds_features.add_feature(feature,
                                 border=False,
                                 color=color,
                                 sigil=shape,
                                 arrowshaft_height=0.6,
                                 arrowhead_length=0.5,
                                 label_position="start",
                                 label_size=1,
                                 label_angle=90,
                                 label=False)
        #if count/1000.0==3:
        #print count

        #And draw it...
    #print "im now drawing it"
    gdd.draw(format='linear',
             orientation='landscape',
             tracklines=False,
             pagesize='A4',
             fragments=10)
    gdd.write(outfile, 'PDF')
    gdd.write("GROS_linear.svg", 'SVG')

    #And a circular version
    #Change the order and leave an empty space in the center:
    gdd.move_track(1, 3)
    gdd.draw(format='circular', tracklines=False, pagesize=(30 * cm, 30 * cm))
    gdd.write(outfile2, 'PDF')
    gdd.write("GROS_circ.svg", 'SVG')
示例#2
0
for f in [file_a, file_b, file_a_vs_b]:
    if not os.path.isfile(os.path.join(input_folder, f)):
        print("Missing input file %s.fna" % f)
        sys.exit(1)

# Only doing a_vs_b here, could also have b_vs_c and c_vs_d etc
genomes = [
    (os.path.join(input_folder, file_a), format_a),
    (os.path.join(input_folder, file_b), format_b),
]
comparisons = [os.path.join(input_folder, file_a_vs_b)]

# Create diagram with tracks, each with a feature set
assert len(genomes) >= 2 and len(genomes) == len(comparisons) + 1
gd_diagram = Diagram(name, track_size=0.35, circular=False)
tracks = dict()
feature_sets = dict()
records = dict()
for f, format in genomes:
    records[f] = SeqIO.read(f, format)
    tracks[f] = gd_diagram.new_track(1,
                                     name=f,
                                     start=0,
                                     end=len(records[f]),
                                     scale_smalltick_interval=1000,
                                     scale_largetick_interval=10000,
                                     greytrack=True,
                                     greytrack_labels=0)
    feature_sets[f] = tracks[f].new_set()
示例#3
0
    def test_diagram_via_methods_pdf(self) :
        """Construct and draw PDF using method approach."""
        genbank_entry = self.record
        gdd = Diagram('Test Diagram')

        #Add a track of features,
        gdt_features = gdd.new_track(1, greytrack=True,
                                     name="CDS Features", greytrack_labels=0,
                                     height=0.5)
        #We'll just use one feature set for the genes and misc_features,
        gds_features = gdt_features.new_set()
        for feature in genbank_entry.features:
            if feature.type == "gene" :
                if len(gds_features) % 2 == 0 :
                    color = "blue"
                else :
                    color = "lightblue"
                gds_features.add_feature(feature, color=color,
                                            #label_position = "middle",
                                            #label_position = "end",
                                            label_position = "start",
                                            label_size = 11,
                                            #label_angle = 90,
                                            sigil="ARROW",
                                            label=True)

        #I want to include some strandless features, so for an example
        #will use EcoRI recognition sites etc.
        for site, name, color in [("GAATTC","EcoRI","green"),
                                  ("CCCGGG","SmaI","orange"),
                                  ("AAGCTT","HindIII","red"),
                                  ("GGATCC","BamHI","purple")] :
            index = 0
            while True :
                index  = genbank_entry.seq.find(site, start=index)
                if index == -1 : break
                feature = SeqFeature(FeatureLocation(index, index+6), strand=None)
                gds_features.add_feature(feature, color=color,
                                            #label_position = "middle",
                                            label_size = 10,
                                            label_color=color,
                                            #label_angle = 90,
                                            name=name,
                                            label=True)
                index += len(site)
            del index

        #Now add a graph track...
        gdt_at_gc = gdd.new_track(2, greytrack=True,
                                  name="AT and GC content",
                                  greytrack_labels=True)
        gds_at_gc = gdt_at_gc.new_set(type="graph")

        step = len(genbank_entry)/200
        gds_at_gc.new_graph(apply_to_window(genbank_entry.seq, step, calc_gc_content, step),
                        'GC content', style='line', 
                        color=colors.lightgreen,
                        altcolor=colors.darkseagreen)
        gds_at_gc.new_graph(apply_to_window(genbank_entry.seq, step, calc_at_content, step),
                        'AT content', style='line', 
                        color=colors.orange,
                        altcolor=colors.red)
        
        #Finally draw it in both formats,
        gdd.draw(format='linear', orientation='landscape',
             tracklines=0, pagesize='A4', fragments=3)
        output_filename = os.path.join('Graphics', 'GD_by_meth_linear.pdf')
        gdd.write(output_filename, 'PDF')

        #Change the order and leave an empty space in the center:
        gdd.move_track(1,3)

        gdd.draw(format='circular', tracklines=False,
                 pagesize=(20*cm,20*cm), circular=True)
        output_filename = os.path.join('Graphics', 'GD_by_meth_circular.pdf')
        gdd.write(output_filename, 'PDF')
示例#4
0
    def test_diagram_via_object_pdf(self):
        """Construct and draw PDF using object approach."""
        genbank_entry = self.record
        gdd = Diagram('Test Diagram')

        #First add some feature sets:
        gdfs1 = FeatureSet(name='CDS features')
        gdfs2 = FeatureSet(name='gene features')
        gdfs3 = FeatureSet(name='misc_features')
        gdfs4 = FeatureSet(name='repeat regions')

        cds_count = 0
        for feature in genbank_entry.features:
            if feature.type == 'CDS':
                cds_count += 1
                if cds_count % 2 == 0 :
                    gdfs1.add_feature(feature, color=colors.pink)
                else :
                    gdfs1.add_feature(feature, color=colors.red)

            if feature.type == 'gene':
                gdfs2.add_feature(feature)

            if feature.type == 'misc_feature':
                gdfs3.add_feature(feature, color=colors.orange)

            if feature.type == 'repeat_region':
                gdfs4.add_feature(feature, color=colors.purple)


        gdfs1.set_all_features('label', 1)
        gdfs2.set_all_features('label', 1)
        gdfs3.set_all_features('label', 1)
        gdfs4.set_all_features('label', 1)

        gdfs3.set_all_features('hide', 0)
        gdfs4.set_all_features('hide', 0)

        #gdfs1.set_all_features('color', colors.red)
        gdfs2.set_all_features('color', colors.blue)

        gdt1 = Track('CDS features', greytrack=True,
                     scale_largetick_interval=1e4,
                     scale_smalltick_interval=1e3,
                     greytrack_labels=10,
                     greytrack_font_color="red",
                     scale_format = "SInt")
        gdt1.add_set(gdfs1)

        gdt2 = Track('gene features', greytrack=1,
                   scale_largetick_interval=1e4)
        gdt2.add_set(gdfs2)
                
        gdt3 = Track('misc features and repeats', greytrack=1,
                   scale_largetick_interval=1e4)
        gdt3.add_set(gdfs3)
        gdt3.add_set(gdfs4)

        #Now add some graph sets:

        #Use a fairly large step so we can easily tell the difference
        #between the bar and line graphs.
        step = len(genbank_entry)/200
        gdgs1 = GraphSet('GC skew')
        
        graphdata1 = apply_to_window(genbank_entry.seq, step, calc_gc_skew, step)
        gdgs1.new_graph(graphdata1, 'GC Skew', style='bar',
                color=colors.violet,
                altcolor=colors.purple)
        
        gdt4 = Track(\
                'GC Skew (bar)',
                height=1.94, greytrack=1,
                scale_largetick_interval=1e4)
        gdt4.add_set(gdgs1)


        gdgs2 = GraphSet('GC and AT Content')
        gdgs2.new_graph(apply_to_window(genbank_entry.seq, step, calc_gc_content, step),
                        'GC content', style='line', 
                        color=colors.lightgreen,
                        altcolor=colors.darkseagreen)

        gdgs2.new_graph(apply_to_window(genbank_entry.seq, step, calc_at_content, step),
                        'AT content', style='line', 
                        color=colors.orange,
                        altcolor=colors.red)    

        gdt5 = Track(\
                'GC Content(green line), AT Content(red line)',
                height=1.94, greytrack=1,
                scale_largetick_interval=1e4)
        gdt5.add_set(gdgs2)

        gdgs3 = GraphSet('Di-nucleotide count')
        step = len(genbank_entry)/400 #smaller step
        gdgs3.new_graph(apply_to_window(genbank_entry.seq, step, calc_dinucleotide_counts, step),
                        'Di-nucleotide count', style='heat', 
                        color=colors.red, altcolor=colors.orange)
        gdt6 = Track('Di-nucleotide count', height=0.5, greytrack=False, scale=False)
        gdt6.add_set(gdgs3)

        #Add the tracks (from both features and graphs)
        #Leave some white space in the middle
        gdd.add_track(gdt4, 3) # GC skew
        gdd.add_track(gdt5, 4) # GC and AT content
        gdd.add_track(gdt1, 5) # CDS features
        gdd.add_track(gdt2, 6) # Gene features
        gdd.add_track(gdt3, 7) # Misc features and repeat feature
        gdd.add_track(gdt6, 8) # Feature depth

        #Finally draw it in both formats,
        gdd.draw(format='circular', orientation='landscape',
             tracklines=0, pagesize='A0', circular=True)
        output_filename = os.path.join('Graphics', 'GD_by_obj_circular.pdf')
        gdd.write(output_filename, 'PDF')

        gdd.draw(format='linear', orientation='landscape',
             tracklines=0, pagesize='A0', fragments=3)
        output_filename = os.path.join('Graphics', 'GD_by_obj_linear.pdf')
        gdd.write(output_filename, 'PDF')
示例#5
0
 def setUp(self) :
     self.gdd = Diagram('Test Diagram', circular=False,
                        y=0.01, yt=0.01, yb=0.01,
                        x=0.01, xl=0.01, xr=0.01)
示例#6
0
    def test_partial_diagram(self) :
        """construct and draw SVG and PDF for just part of a SeqRecord."""
        genbank_entry = self.record
        start = 6500
        end = 8750
        
        gdd = Diagram('Test Diagram',
                      #For the circular diagram we don't want a closed cirle:
                      circular=False,
                      )
        #Add a track of features,
        gdt_features = gdd.new_track(1, greytrack=True,
                                     name="CDS Features",
                                     scale_largetick_interval=1000,
                                     scale_smalltick_interval=100,
                                     scale_format = "SInt",
                                     greytrack_labels=False,
                                     height=0.5)
        #We'll just use one feature set for these features,
        gds_features = gdt_features.new_set()
        for feature in genbank_entry.features:
            if feature.type <> "CDS" :
                #We're going to ignore these.
                continue
            if feature.location.end.position < start :
                #Out of frame (too far left)
                continue
            if feature.location.start.position > end :
                #Out of frame (too far right)
                continue

            #Note that I am using strings for color names, instead
            #of passing in color objects.  This should also work!
            if len(gds_features) % 2 == 0 :
                color = "white" #for testing the automatic black border!
            else :
                color = "red"
            #Checking it can cope with the old UK spelling colour.
            #Also show the labels perpendicular to the track.
            gds_features.add_feature(feature, colour=color,
                                     sigil="ARROW",
                                     label_position = "start",
                                     label_size = 8,
                                     label_angle = 90,
                                     label=True)

        #And draw it...
        gdd.draw(format='linear', orientation='landscape',
                 tracklines=False, pagesize=(10*cm,6*cm), fragments=1,
                 start=start, end=end)
        output_filename = os.path.join('Graphics', 'GD_region_linear.pdf')
        gdd.write(output_filename, 'PDF')

        #Also check the write_to_string method matches,
        #(Note the possible confusion over new lines on Windows)
        assert open(output_filename).read().replace("\r\n","\n") \
               == gdd.write_to_string('PDF').replace("\r\n","\n")

        output_filename = os.path.join('Graphics', 'GD_region_linear.svg')
        gdd.write(output_filename, 'SVG')

        #Circular with a particular start/end is a bit odd, but by setting
        #circular=False (above) a sweep of 90% is used (a wedge is left out)
        gdd.draw(format='circular',
                 tracklines=False, pagesize=(10*cm,10*cm),
                 start=start, end=end)
        output_filename = os.path.join('Graphics', 'GD_region_circular.pdf')
        gdd.write(output_filename, 'PDF')
        output_filename = os.path.join('Graphics', 'GD_region_circular.svg')
        gdd.write(output_filename, 'SVG')
示例#7
0
    def test_diagram_via_methods_pdf(self):
        """Construct and draw PDF using method approach."""
        genbank_entry = self.record
        gdd = Diagram("Test Diagram")

        # Add a track of features,
        gdt_features = gdd.new_track(1,
                                     greytrack=True,
                                     name="CDS Features",
                                     greytrack_labels=0,
                                     height=0.5)
        # We'll just use one feature set for the genes and misc_features,
        gds_features = gdt_features.new_set()
        for feature in genbank_entry.features:
            if feature.type == "gene":
                if len(gds_features) % 2 == 0:
                    color = "blue"
                else:
                    color = "lightblue"
                gds_features.add_feature(
                    feature,
                    color=color,
                    # label_position="middle",
                    # label_position="end",
                    label_position="start",
                    label_size=11,
                    # label_angle=90,
                    sigil="ARROW",
                    label=True,
                )

        # I want to include some strandless features, so for an example
        # will use EcoRI recognition sites etc.
        for site, name, color in [
            ("GAATTC", "EcoRI", "green"),
            ("CCCGGG", "SmaI", "orange"),
            ("AAGCTT", "HindIII", "red"),
            ("GGATCC", "BamHI", "purple"),
        ]:
            index = 0
            while True:
                index = genbank_entry.seq.find(site, start=index)
                if index == -1:
                    break
                feature = SeqFeature(FeatureLocation(index, index + 6),
                                     strand=None)

                # This URL should work in SVG output from recent versions
                # of ReportLab.  You need ReportLab 2.4 or later
                try:
                    url = ("http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi" +
                           "?db=protein&id=%s" %
                           feature.qualifiers["protein_id"][0])
                except KeyError:
                    url = None

                gds_features.add_feature(
                    feature,
                    color=color,
                    url=url,
                    # label_position="middle",
                    label_size=10,
                    label_color=color,
                    # label_angle=90,
                    name=name,
                    label=True,
                )
                index += len(site)
            del index

        # Now add a graph track...
        gdt_at_gc = gdd.new_track(2,
                                  greytrack=True,
                                  name="AT and GC content",
                                  greytrack_labels=True)
        gds_at_gc = gdt_at_gc.new_set(type="graph")

        step = len(genbank_entry) // 200
        gds_at_gc.new_graph(
            apply_to_window(genbank_entry.seq, step, calc_gc_content, step),
            "GC content",
            style="line",
            color=colors.lightgreen,
            altcolor=colors.darkseagreen,
        )
        gds_at_gc.new_graph(
            apply_to_window(genbank_entry.seq, step, calc_at_content, step),
            "AT content",
            style="line",
            color=colors.orange,
            altcolor=colors.red,
        )

        # Finally draw it in both formats,
        gdd.draw(
            format="linear",
            orientation="landscape",
            tracklines=0,
            pagesize="A4",
            fragments=3,
        )
        output_filename = os.path.join("Graphics", "GD_by_meth_linear.pdf")
        gdd.write(output_filename, "PDF")

        gdd.draw(
            format="circular",
            tracklines=False,
            circle_core=0.8,
            pagesize=(20 * cm, 20 * cm),
            circular=True,
        )
        output_filename = os.path.join("Graphics", "GD_by_meth_circular.pdf")
        gdd.write(output_filename, "PDF")
示例#8
0
    def test_partial_diagram(self):
        """Construct and draw SVG and PDF for just part of a SeqRecord."""
        genbank_entry = self.record
        start = 6500
        end = 8750

        gdd = Diagram(
            "Test Diagram",
            # For the circular diagram we don't want a closed cirle:
            circular=False,
        )
        # Add a track of features,
        gdt_features = gdd.new_track(
            1,
            greytrack=True,
            name="CDS Features",
            scale_largetick_interval=1000,
            scale_smalltick_interval=100,
            scale_format="SInt",
            greytrack_labels=False,
            height=0.5,
        )
        # We'll just use one feature set for these features,
        gds_features = gdt_features.new_set()
        for feature in genbank_entry.features:
            if feature.type != "CDS":
                # We're going to ignore these.
                continue
            if feature.location.end.position < start:
                # Out of frame (too far left)
                continue
            if feature.location.start.position > end:
                # Out of frame (too far right)
                continue

            # This URL should work in SVG output from recent versions
            # of ReportLab.  You need ReportLab 2.4 or later
            try:
                url = (
                    "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi" +
                    "?db=protein&id=%s" % feature.qualifiers["protein_id"][0])
            except KeyError:
                url = None

            # Note that I am using strings for color names, instead
            # of passing in color objects.  This should also work!
            if len(gds_features) % 2 == 0:
                color = "white"  # for testing the automatic black border!
            else:
                color = "red"
            # Checking it can cope with the old UK spelling colour.
            # Also show the labels perpendicular to the track.
            gds_features.add_feature(
                feature,
                colour=color,
                url=url,
                sigil="ARROW",
                label_position=None,
                label_size=8,
                label_angle=90,
                label=True,
            )

        # And draw it...
        gdd.draw(
            format="linear",
            orientation="landscape",
            tracklines=False,
            pagesize=(10 * cm, 6 * cm),
            fragments=1,
            start=start,
            end=end,
        )
        output_filename = os.path.join("Graphics", "GD_region_linear.pdf")
        gdd.write(output_filename, "PDF")

        # Also check the write_to_string (bytes string) method matches,
        with open(output_filename, "rb") as handle:
            self.assertEqual(handle.read(), gdd.write_to_string("PDF"))

        output_filename = os.path.join("Graphics", "GD_region_linear.svg")
        gdd.write(output_filename, "SVG")

        # Circular with a particular start/end is a bit odd, but by setting
        # circular=False (above) a sweep of 90% is used (a wedge is left out)
        gdd.draw(
            format="circular",
            tracklines=False,
            pagesize=(10 * cm, 10 * cm),
            start=start,
            end=end,
        )
        output_filename = os.path.join("Graphics", "GD_region_circular.pdf")
        gdd.write(output_filename, "PDF")
        output_filename = os.path.join("Graphics", "GD_region_circular.svg")
        gdd.write(output_filename, "SVG")
示例#9
0
    def test_diagram_via_object_pdf(self):
        """Construct and draw PDF using object approach."""
        genbank_entry = self.record
        gdd = Diagram("Test Diagram")

        gdt1 = Track(
            "CDS features",
            greytrack=True,
            scale_largetick_interval=1e4,
            scale_smalltick_interval=1e3,
            greytrack_labels=10,
            greytrack_font_color="red",
            scale_format="SInt",
        )
        gdt2 = Track("gene features",
                     greytrack=1,
                     scale_largetick_interval=1e4)

        # First add some feature sets:
        gdfsA = FeatureSet(name="CDS backgrounds")
        gdfsB = FeatureSet(name="gene background")

        gdfs1 = FeatureSet(name="CDS features")
        gdfs2 = FeatureSet(name="gene features")
        gdfs3 = FeatureSet(name="misc_features")
        gdfs4 = FeatureSet(name="repeat regions")

        prev_gene = None
        cds_count = 0
        for feature in genbank_entry.features:
            if feature.type == "CDS":
                cds_count += 1
                if prev_gene:
                    # Assuming it goes with this CDS!
                    if cds_count % 2 == 0:
                        dark, light = colors.peru, colors.tan
                    else:
                        dark, light = colors.burlywood, colors.bisque
                    # Background for CDS,
                    a = gdfsA.add_feature(
                        SeqFeature(
                            FeatureLocation(feature.location.start,
                                            feature.location.end,
                                            strand=0)),
                        color=dark,
                    )
                    # Background for gene,
                    b = gdfsB.add_feature(
                        SeqFeature(
                            FeatureLocation(
                                prev_gene.location.start,
                                prev_gene.location.end,
                                strand=0,
                            )),
                        color=dark,
                    )
                    # Cross link,
                    gdd.cross_track_links.append(CrossLink(a, b, light, dark))
                    prev_gene = None
            if feature.type == "gene":
                prev_gene = feature

        # Some cross links on the same linear diagram fragment,
        f, c = fill_and_border(colors.red)
        a = gdfsA.add_feature(SeqFeature(FeatureLocation(2220, 2230)),
                              color=f,
                              border=c)
        b = gdfsB.add_feature(SeqFeature(FeatureLocation(2200, 2210)),
                              color=f,
                              border=c)
        gdd.cross_track_links.append(CrossLink(a, b, f, c))

        f, c = fill_and_border(colors.blue)
        a = gdfsA.add_feature(SeqFeature(FeatureLocation(2150, 2200)),
                              color=f,
                              border=c)
        b = gdfsB.add_feature(SeqFeature(FeatureLocation(2220, 2290)),
                              color=f,
                              border=c)
        gdd.cross_track_links.append(CrossLink(a, b, f, c, flip=True))

        f, c = fill_and_border(colors.green)
        a = gdfsA.add_feature(SeqFeature(FeatureLocation(2250, 2560)),
                              color=f,
                              border=c)
        b = gdfsB.add_feature(SeqFeature(FeatureLocation(2300, 2860)),
                              color=f,
                              border=c)
        gdd.cross_track_links.append(CrossLink(a, b, f, c))

        # Some cross links where both parts are saddling the linear diagram fragment boundary,
        f, c = fill_and_border(colors.red)
        a = gdfsA.add_feature(SeqFeature(FeatureLocation(3155, 3250)),
                              color=f,
                              border=c)
        b = gdfsB.add_feature(SeqFeature(FeatureLocation(3130, 3300)),
                              color=f,
                              border=c)
        gdd.cross_track_links.append(CrossLink(a, b, f, c))
        # Nestled within that (drawn on top),
        f, c = fill_and_border(colors.blue)
        a = gdfsA.add_feature(SeqFeature(FeatureLocation(3160, 3275)),
                              color=f,
                              border=c)
        b = gdfsB.add_feature(SeqFeature(FeatureLocation(3180, 3225)),
                              color=f,
                              border=c)
        gdd.cross_track_links.append(CrossLink(a, b, f, c, flip=True))

        # Some cross links where two features are on either side of the linear diagram fragment boundary,
        f, c = fill_and_border(colors.green)
        a = gdfsA.add_feature(SeqFeature(FeatureLocation(6450, 6550)),
                              color=f,
                              border=c)
        b = gdfsB.add_feature(SeqFeature(FeatureLocation(6265, 6365)),
                              color=f,
                              border=c)
        gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c))
        f, c = fill_and_border(colors.gold)
        a = gdfsA.add_feature(SeqFeature(FeatureLocation(6265, 6365)),
                              color=f,
                              border=c)
        b = gdfsB.add_feature(SeqFeature(FeatureLocation(6450, 6550)),
                              color=f,
                              border=c)
        gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c))
        f, c = fill_and_border(colors.red)
        a = gdfsA.add_feature(SeqFeature(FeatureLocation(6275, 6375)),
                              color=f,
                              border=c)
        b = gdfsB.add_feature(SeqFeature(FeatureLocation(6430, 6530)),
                              color=f,
                              border=c)
        gdd.cross_track_links.append(
            CrossLink(a, b, color=f, border=c, flip=True))
        f, c = fill_and_border(colors.blue)
        a = gdfsA.add_feature(SeqFeature(FeatureLocation(6430, 6530)),
                              color=f,
                              border=c)
        b = gdfsB.add_feature(SeqFeature(FeatureLocation(6275, 6375)),
                              color=f,
                              border=c)
        gdd.cross_track_links.append(
            CrossLink(a, b, color=f, border=c, flip=True))

        cds_count = 0
        for feature in genbank_entry.features:
            if feature.type == "CDS":
                cds_count += 1
                if cds_count % 2 == 0:
                    gdfs1.add_feature(feature,
                                      color=colors.pink,
                                      sigil="ARROW")
                else:
                    gdfs1.add_feature(feature, color=colors.red, sigil="ARROW")

            if feature.type == "gene":
                # Note we set the colour of ALL the genes later on as a test,
                gdfs2.add_feature(feature, sigil="ARROW")

            if feature.type == "misc_feature":
                gdfs3.add_feature(feature, color=colors.orange)

            if feature.type == "repeat_region":
                gdfs4.add_feature(feature, color=colors.purple)

        # gdd.cross_track_links = gdd.cross_track_links[:1]

        gdfs1.set_all_features("label", 1)
        gdfs2.set_all_features("label", 1)
        gdfs3.set_all_features("label", 1)
        gdfs4.set_all_features("label", 1)

        gdfs3.set_all_features("hide", 0)
        gdfs4.set_all_features("hide", 0)

        # gdfs1.set_all_features('color', colors.red)
        gdfs2.set_all_features("color", colors.blue)

        gdt1.add_set(gdfsA)  # Before CDS so under them!
        gdt1.add_set(gdfs1)

        gdt2.add_set(gdfsB)  # Before genes so under them!
        gdt2.add_set(gdfs2)

        gdt3 = Track("misc features and repeats",
                     greytrack=1,
                     scale_largetick_interval=1e4)
        gdt3.add_set(gdfs3)
        gdt3.add_set(gdfs4)

        # Now add some graph sets:

        # Use a fairly large step so we can easily tell the difference
        # between the bar and line graphs.
        step = len(genbank_entry) // 200
        gdgs1 = GraphSet("GC skew")

        graphdata1 = apply_to_window(genbank_entry.seq, step, calc_gc_skew,
                                     step)
        gdgs1.new_graph(
            graphdata1,
            "GC Skew",
            style="bar",
            color=colors.violet,
            altcolor=colors.purple,
        )

        gdt4 = Track("GC Skew (bar)",
                     height=1.94,
                     greytrack=1,
                     scale_largetick_interval=1e4)
        gdt4.add_set(gdgs1)

        gdgs2 = GraphSet("GC and AT Content")
        gdgs2.new_graph(
            apply_to_window(genbank_entry.seq, step, calc_gc_content, step),
            "GC content",
            style="line",
            color=colors.lightgreen,
            altcolor=colors.darkseagreen,
        )

        gdgs2.new_graph(
            apply_to_window(genbank_entry.seq, step, calc_at_content, step),
            "AT content",
            style="line",
            color=colors.orange,
            altcolor=colors.red,
        )

        gdt5 = Track(
            "GC Content(green line), AT Content(red line)",
            height=1.94,
            greytrack=1,
            scale_largetick_interval=1e4,
        )
        gdt5.add_set(gdgs2)

        gdgs3 = GraphSet("Di-nucleotide count")
        step = len(genbank_entry) // 400  # smaller step
        gdgs3.new_graph(
            apply_to_window(genbank_entry.seq, step, calc_dinucleotide_counts,
                            step),
            "Di-nucleotide count",
            style="heat",
            color=colors.red,
            altcolor=colors.orange,
        )
        gdt6 = Track("Di-nucleotide count",
                     height=0.5,
                     greytrack=False,
                     scale=False)
        gdt6.add_set(gdgs3)

        # Add the tracks (from both features and graphs)
        # Leave some white space in the middle/bottom
        gdd.add_track(gdt4, 3)  # GC skew
        gdd.add_track(gdt5, 4)  # GC and AT content
        gdd.add_track(gdt1, 5)  # CDS features
        gdd.add_track(gdt2, 6)  # Gene features
        gdd.add_track(gdt3, 7)  # Misc features and repeat feature
        gdd.add_track(gdt6, 8)  # Feature depth

        # Finally draw it in both formats, and full view and partial
        gdd.draw(format="circular",
                 orientation="landscape",
                 tracklines=0,
                 pagesize="A0")
        output_filename = os.path.join("Graphics", "GD_by_obj_circular.pdf")
        gdd.write(output_filename, "PDF")

        gdd.circular = False
        gdd.draw(
            format="circular",
            orientation="landscape",
            tracklines=0,
            pagesize="A0",
            start=3000,
            end=6300,
        )
        output_filename = os.path.join("Graphics",
                                       "GD_by_obj_frag_circular.pdf")
        gdd.write(output_filename, "PDF")

        gdd.draw(
            format="linear",
            orientation="landscape",
            tracklines=0,
            pagesize="A0",
            fragments=3,
        )
        output_filename = os.path.join("Graphics", "GD_by_obj_linear.pdf")
        gdd.write(output_filename, "PDF")

        gdd.set_all_tracks("greytrack_labels", 2)
        gdd.draw(
            format="linear",
            orientation="landscape",
            tracklines=0,
            pagesize=(30 * cm, 10 * cm),
            fragments=1,
            start=3000,
            end=6300,
        )
        output_filename = os.path.join("Graphics", "GD_by_obj_frag_linear.pdf")
        gdd.write(output_filename, "PDF")
示例#10
0
 def setUp(self):
     """Initialise diagram."""
     self.gdd = Diagram("Test Diagram", circular=False,
                        y=0.01, yt=0.01, yb=0.01,
                        x=0.01, xl=0.01, xr=0.01)
示例#11
0
from reportlab.lib import colors
from reportlab.lib.units import cm
# Biopython core
from Bio import SeqIO
from Bio.SeqFeature import SeqFeature, FeatureLocation

# Bio.Graphics.GenomeDiagram
from Bio.Graphics.GenomeDiagram import Diagram

################################################################################

#load the genbank file that contains the genes
gbk_filename = "Buchnera.gbk"
genbank_entry = SeqIO.read(open(gbk_filename), "genbank")

gdd = Diagram('Test Diagram')

#Add a track of features,
gdt_features = gdd.new_track(
    1,
    greytrack=True,
    name="CDS Features",
    scale_largetick_interval=10000,
    scale_smalltick_interval=1000,
    scale_fontsize=4,
    scale_format="SInt",
    greytrack_labels=False,  #e.g. 5
    height=0.75)

#We'll just use one feature set for these features,
gds_features = gdt_features.new_set()