def draw_me_something_nice(infile, outfile, outfile2): """function to draw genome diagrams by looping over a load of embl files in a folder>>> this is supposed to add effectors of interest on as coloured items""" genbank_entry = SeqIO.read(open(infile), "embl") name_for_info_out = infile.split(".embl")[0] + "effecotr_info.txt" f_general_output = open(name_for_info_out, "w") #print "im here" gdd = Diagram('Test Diagram') #Add a track of features, gdt_features = gdd.new_track( 1, greytrack=True, name="CDS Features", scale_largetick_interval=100000, scale_smalltick_interval=5000, scale_fontsize=3, scale_format="SInt", greytrack_labels=False, #e.g. 5 height=0.75) #We'll just use one feature set for these features, gds_features = gdt_features.new_set() add_jaggies(str(genbank_entry.seq), 0, gds_features) #genes of interest effectors = """#gene GPALN001111 GPALN001252 GPALN001912 GPALN002106 GPALN002290 GPALN002295 GPALN002300 GPALN002383 GPALN002386 GPALN002387 GPALN002593 GPALN002947 GPALN003010 GPALN003306 GPALN003381 GPALN003415 GPALN003793 GPALN003794 GPALN003795 GPALN003831 GPALN003952 GPALN003970 GPALN003975 GPALN004254 GPALN004470 GPALN004493 GPALN004587 GPALN004712 GPALN004734 GPALN004862 GPALN004897 GPALN005042 GPALN005067 GPALN005090 GPALN005100 GPALN005105 GPALN005801 GPALN005901 GPALN005903 GPALN005905 GPALN005953 GPALN006035 GPALN006057 GPALN006059 GPALN006061 GPALN006067 GPALN006752 GPALN006754 GPALN006755 GPALN006756 GPALN006759 GPALN006766 GPALN006769 GPALN006775 GPALN006818 GPALN006828 GPALN006839 GPALN006853 GPALN006856 GPALN006945 GPALN007181 GPALN007436 GPALN007445 GPALN007670 GPALN007708 GPALN007837 GPALN008101 GPALN009056 GPALN009444 GPALN009458 GPALN009497 GPALN009498 GPALN009532 GPALN009580 GPALN009586 GPALN009589 GPALN009796 GPALN009815 GPALN009825 GPALN009837 GPALN009900 GPALN009918 GPALN010093 GPALN010199 GPALN010232 GPALN010540 GPALN010542 GPALN010554 GPALN010603 GPALN010659 GPALN010702 GPALN010737 GPALN010789 GPALN010793 GPALN010968 GPALN010970 GPALN011399 GPALN011823 GPALN011858 GPALN011865 GPALN012007 GPALN012056 GPALN012064 GPALN012287 GPALN013168 GPALN013277 GPALN013280 GPALN013347 GPALN013348 GPALN013350 GPALN013383 GPALN013384 GPALN013459 GPALN013480 GPALN013496 GPALN014034 GPALN014145 GPALN014146 GPALN014235 GPALN014268 GPALN014324 GPALN014327 GPALN014354 GPALN014355 GPALN014357 GPALN014378 GPALN014379 GPALN014381 GPALN014395 GPALN014477 GPALN014498 GPALN014747 GPALN014750 GPALN014881 GPALN015014 GPALN015073 GPALN015211 GPALN015248 GPALN015280 GPALN015295 GPALN015296 GPALN015298 GPALN015299 GPALN015301 GPALN015302 GPALN015304 GPALN015309 GPALN015425 GPALN015632 GPALN016298 GPALN016343 GPALN016360 GPALN016380 GPALN002204 GPALN002370 GPALN002666 GPALN002991 GPALN003997 GPALN004009 GPALN005554 GPALN007648 GPALN012415 GPALN013387 GPALN013545 GPALN014713 GPALN015272 GPALN015314 GPALN015605 GPALN015654 GPALN002028 GPALN002288 GPALN002969 GPALN003083 GPALN003416 GPALN003852 GPALN004011 GPALN004265 GPALN004480 GPALN004881 GPALN004901 GPALN005017 GPALN005038 GPALN006124 GPALN007079 GPALN007178 GPALN009323 GPALN010231 GPALN010636 GPALN011715 GPALN011857 GPALN012062 GPALN014261 GPALN014271 GPALN014665 GPALN014857 GPALN015013 GPALN015100 GPALN015116 GPALN015172 GPALN015218 GPALN000707 GPALN001149 GPALN001153 GPALN001281 GPALN001284 GPALN001315 GPALN001641 GPALN001729 GPALN001738 GPALN001745 GPALN002294 GPALN002346 GPALN002349 GPALN002377 GPALN002379 GPALN002466 GPALN002494 GPALN003077 GPALN003222 GPALN003326 GPALN003368 GPALN003369 GPALN003846 GPALN003860 GPALN003876 GPALN003882 GPALN003891 GPALN003905 GPALN003908 GPALN003910 GPALN003911 GPALN003912 GPALN003913 GPALN003925 GPALN003942 GPALN003943 GPALN003946 GPALN003949 GPALN003953 GPALN003954 GPALN003955 GPALN003977 GPALN003990 GPALN004007 GPALN004008 GPALN004010 GPALN004014 GPALN004017 GPALN004018 GPALN004064 GPALN004130 GPALN004342 GPALN004369 GPALN004380 GPALN004410 GPALN004411 GPALN004506 GPALN004534 GPALN004553 GPALN004554 GPALN004555 GPALN004557 GPALN004678 GPALN004679 GPALN004681 GPALN004798 GPALN005064 GPALN005129 GPALN005160 GPALN005161 GPALN005611 GPALN005738 GPALN005986 GPALN006031 GPALN006038 GPALN006112 GPALN006223 GPALN006413 GPALN006581 GPALN006596 GPALN006778 GPALN006780 GPALN006782 GPALN006860 GPALN006911 GPALN007051 GPALN007058 GPALN007072 GPALN007120 GPALN007129 GPALN007132 GPALN007139 GPALN007179 GPALN007201 GPALN007443 GPALN007647 GPALN007696 GPALN007748 GPALN007796 GPALN007811 GPALN007848 GPALN007899 GPALN008074 GPALN008097 GPALN008098 GPALN008100 GPALN008102 GPALN008108 GPALN008152 GPALN008161 GPALN008462 GPALN008535 GPALN009441 GPALN009443 GPALN009492 GPALN009505 GPALN009640 GPALN009669 GPALN009670 GPALN009695 GPALN009823 GPALN009839 GPALN009902 GPALN010067 GPALN010126 GPALN010127 GPALN010171 GPALN010316 GPALN010321 GPALN010414 GPALN010416 GPALN010432 GPALN010433 GPALN010511 GPALN010519 GPALN010534 GPALN010536 GPALN010598 GPALN010602 GPALN010621 GPALN010625 GPALN010778 GPALN010795 GPALN010824 GPALN011797 GPALN011812 GPALN011852 GPALN012010 GPALN012025 GPALN012067 GPALN012099 GPALN012127 GPALN012134 GPALN012284 GPALN012357 GPALN012358 GPALN012366 GPALN012838 GPALN013104 GPALN013109 GPALN013144 GPALN013150 GPALN013385 GPALN014005 GPALN014368 GPALN014369 GPALN014370 GPALN014371 GPALN014372 GPALN014377 GPALN014397 GPALN014398 GPALN014514 GPALN014539 GPALN014576 GPALN014672 GPALN014707 GPALN014746 GPALN014851 GPALN014865 GPALN014866 GPALN014867 GPALN014868 GPALN014885 GPALN014904 GPALN015061 GPALN015177 GPALN015178 GPALN015179 GPALN015181 GPALN015182 GPALN015183 GPALN015186 GPALN015188 GPALN015193 GPALN015243 GPALN015262 GPALN015279 GPALN015285 GPALN015291 GPALN015297 GPALN015738 GPALN015769 GPALN016090 GPALN016091 GPALN016117 GPALN016181 GPALN016188 GPALN016330 GPALN016378""".split("\n") SPRYSEC = """GPALN012056.T1 GPALN009532.T1 GPALN003794.T1 GPALN014357.T1 GPALN010968.T1 GPALN001352.T1 GPALN006035.T1 GPALN007139.T1 GPALN013168.T1 GPALN006853.T1 GPALN010970.T1 GPALN014477.T1 GPALN015302.T1 GPALN012007.T1 GPALN015309.T1 GPALN010793.T1 GPALN006818.T1 GPALN013114.T1 GPALN006860.T1 GPALN009815.T1 GPALN006839.T1 GPALN006856.T1 GPALN004734.T1 GPALN006596.T1 GPALN013383.T1 GPALN011823.T1 GPALN012287.T1 GPALN009918.T1 GPALN014398.T1 GPALN010231.T1 GPALN009669.T1 GPALN010232.T1 GPALN013348.T1 GPALN013350.T1 GPALN010645.T1 GPALN010093.T1 GPALN014397.T1 GPALN002288.T1 GPALN002300.T1 GPALN011858.T1 GPALN015298.T1 GPALN013480.T1 GPALN009458.T1 GPALN010789.T1 GPALN007168.T1 GPALN008646.T1 GPALN006775.T1 GPALN015295.T1 GPALN004897.T1 GPALN002290.T1 GPALN015013.T1 GPALN014271.T1 GPALN015632.T1 GPALN015301.T1 GPALN014355.T1 GPALN007445.T1 GPALN015280.T1 GPALN007711.T1 GPALN015314.T1 GPALN010569.T1 GPALN007132.T1 GPALN006828.T1 GPALN004881.T1 GPALN007129.T1 GPALN013385.T1 GPALN003057.T1 GPALN015407.T1 GPALN004265.T1 GPALN014395.T1 GPALN012062.T1 GPALN001780.T1 GPALN012064.T1 GPALN007120.T1 GPALN005953.T1 GPALN003793.T1 GPALN015813.T1 GPALN016040.T1""".split("\n") SPRY = """ """.split("\n") Dorsal_set = set([]) J2_set = set([]) dpi_14_set = set([]) names = set([]) effector_list = [] for i in effectors: if i not in names: names.add(i + ".T1") effector_list.append(i + ".T1") dpi_14 = [] J2 = [] count = 0 for feature in genbank_entry.features: count = count + 1 shape = "ARROW" #if feature.type not in ["CDS", "tRNA", "rRNA"] : if feature.type in ["source", "gene"]: #["source", "CDS"] #print "CDS" #We're going to ignore these (ignore genes as the CDS is enough) continue #Note that I am using strings for color names, instead #of passing in color objects. This should also work! color2 = "grey" if feature.type == "tRNA": color = "red" elif feature.type == "rRNA": color = "purple" elif feature.type == "gap": color = "grey" shape = "JAGGY" feature.strand = None #i.e. draw it strandless elif feature.type != "CDS": color = "lightgreen" # adding two features per gene, so not just odd/even: #elif len(gds_features) % 4 == 0 : elif count % 2 == 0: color = "blue" color2 = "lightblue" color = colors.Color(0, 0, 1, 0.4) color2 = colors.Color(.678431, .847059, .901961, 0.2) else: color = "green" color2 = "lightgreen" color = colors.Color(0, 0.501961, 0, 0.4) color2 = colors.Color(0.564706, 0.933333, 0.564706, 0.2) #colour the Dorsal genes yellow for gene_name in effector_list: # print(feature.qualifiers.get("locus_tag", [None])[0].replace(";", "")) if feature.qualifiers.get("locus_tag", [None])[0].replace( ";", "") in gene_name.rstrip(): color = "red" color2 = "pink" f_general_output.write("effector\t%s\t%s\n" % (infile, gene_name)) print("effector\t%s\t%s\n" % (infile, gene_name)) for gene_name in SPRYSEC: #print(feature.qualifiers.get("locus_tag", #[None])[0].replace(";", ""), gene_name) if feature.qualifiers.get("locus_tag", [None])[0].replace( ";", "") in gene_name.rstrip(): color = "blue" color2 = "lightblue" f_general_output.write("SPRYSEC\t%s\t%s\n" % (infile, gene_name)) print("SPRYSEC\t%s\t%s\n" % (infile, gene_name)) gds_features.add_feature( squash_exons(feature), color=color2, sigil="BOX", #sigil=shape, arrowshaft_height=0.8, arrowhead_length=0.5, label_position="start", label_size=1, label_angle=90, label=True) # Don't want the line round the feature as starts to overlap gds_features.add_feature(feature, border=False, color=color, sigil=shape, arrowshaft_height=0.6, arrowhead_length=0.5, label_position="start", label_size=1, label_angle=90, label=False) #if count/1000.0==3: #print count #And draw it... #print "im now drawing it" gdd.draw(format='linear', orientation='landscape', tracklines=False, pagesize='A4', fragments=10) gdd.write(outfile, 'PDF') gdd.write("GROS_linear.svg", 'SVG') #And a circular version #Change the order and leave an empty space in the center: gdd.move_track(1, 3) gdd.draw(format='circular', tracklines=False, pagesize=(30 * cm, 30 * cm)) gdd.write(outfile2, 'PDF') gdd.write("GROS_circ.svg", 'SVG')
for f in [file_a, file_b, file_a_vs_b]: if not os.path.isfile(os.path.join(input_folder, f)): print("Missing input file %s.fna" % f) sys.exit(1) # Only doing a_vs_b here, could also have b_vs_c and c_vs_d etc genomes = [ (os.path.join(input_folder, file_a), format_a), (os.path.join(input_folder, file_b), format_b), ] comparisons = [os.path.join(input_folder, file_a_vs_b)] # Create diagram with tracks, each with a feature set assert len(genomes) >= 2 and len(genomes) == len(comparisons) + 1 gd_diagram = Diagram(name, track_size=0.35, circular=False) tracks = dict() feature_sets = dict() records = dict() for f, format in genomes: records[f] = SeqIO.read(f, format) tracks[f] = gd_diagram.new_track(1, name=f, start=0, end=len(records[f]), scale_smalltick_interval=1000, scale_largetick_interval=10000, greytrack=True, greytrack_labels=0) feature_sets[f] = tracks[f].new_set()
def test_diagram_via_methods_pdf(self) : """Construct and draw PDF using method approach.""" genbank_entry = self.record gdd = Diagram('Test Diagram') #Add a track of features, gdt_features = gdd.new_track(1, greytrack=True, name="CDS Features", greytrack_labels=0, height=0.5) #We'll just use one feature set for the genes and misc_features, gds_features = gdt_features.new_set() for feature in genbank_entry.features: if feature.type == "gene" : if len(gds_features) % 2 == 0 : color = "blue" else : color = "lightblue" gds_features.add_feature(feature, color=color, #label_position = "middle", #label_position = "end", label_position = "start", label_size = 11, #label_angle = 90, sigil="ARROW", label=True) #I want to include some strandless features, so for an example #will use EcoRI recognition sites etc. for site, name, color in [("GAATTC","EcoRI","green"), ("CCCGGG","SmaI","orange"), ("AAGCTT","HindIII","red"), ("GGATCC","BamHI","purple")] : index = 0 while True : index = genbank_entry.seq.find(site, start=index) if index == -1 : break feature = SeqFeature(FeatureLocation(index, index+6), strand=None) gds_features.add_feature(feature, color=color, #label_position = "middle", label_size = 10, label_color=color, #label_angle = 90, name=name, label=True) index += len(site) del index #Now add a graph track... gdt_at_gc = gdd.new_track(2, greytrack=True, name="AT and GC content", greytrack_labels=True) gds_at_gc = gdt_at_gc.new_set(type="graph") step = len(genbank_entry)/200 gds_at_gc.new_graph(apply_to_window(genbank_entry.seq, step, calc_gc_content, step), 'GC content', style='line', color=colors.lightgreen, altcolor=colors.darkseagreen) gds_at_gc.new_graph(apply_to_window(genbank_entry.seq, step, calc_at_content, step), 'AT content', style='line', color=colors.orange, altcolor=colors.red) #Finally draw it in both formats, gdd.draw(format='linear', orientation='landscape', tracklines=0, pagesize='A4', fragments=3) output_filename = os.path.join('Graphics', 'GD_by_meth_linear.pdf') gdd.write(output_filename, 'PDF') #Change the order and leave an empty space in the center: gdd.move_track(1,3) gdd.draw(format='circular', tracklines=False, pagesize=(20*cm,20*cm), circular=True) output_filename = os.path.join('Graphics', 'GD_by_meth_circular.pdf') gdd.write(output_filename, 'PDF')
def test_diagram_via_object_pdf(self): """Construct and draw PDF using object approach.""" genbank_entry = self.record gdd = Diagram('Test Diagram') #First add some feature sets: gdfs1 = FeatureSet(name='CDS features') gdfs2 = FeatureSet(name='gene features') gdfs3 = FeatureSet(name='misc_features') gdfs4 = FeatureSet(name='repeat regions') cds_count = 0 for feature in genbank_entry.features: if feature.type == 'CDS': cds_count += 1 if cds_count % 2 == 0 : gdfs1.add_feature(feature, color=colors.pink) else : gdfs1.add_feature(feature, color=colors.red) if feature.type == 'gene': gdfs2.add_feature(feature) if feature.type == 'misc_feature': gdfs3.add_feature(feature, color=colors.orange) if feature.type == 'repeat_region': gdfs4.add_feature(feature, color=colors.purple) gdfs1.set_all_features('label', 1) gdfs2.set_all_features('label', 1) gdfs3.set_all_features('label', 1) gdfs4.set_all_features('label', 1) gdfs3.set_all_features('hide', 0) gdfs4.set_all_features('hide', 0) #gdfs1.set_all_features('color', colors.red) gdfs2.set_all_features('color', colors.blue) gdt1 = Track('CDS features', greytrack=True, scale_largetick_interval=1e4, scale_smalltick_interval=1e3, greytrack_labels=10, greytrack_font_color="red", scale_format = "SInt") gdt1.add_set(gdfs1) gdt2 = Track('gene features', greytrack=1, scale_largetick_interval=1e4) gdt2.add_set(gdfs2) gdt3 = Track('misc features and repeats', greytrack=1, scale_largetick_interval=1e4) gdt3.add_set(gdfs3) gdt3.add_set(gdfs4) #Now add some graph sets: #Use a fairly large step so we can easily tell the difference #between the bar and line graphs. step = len(genbank_entry)/200 gdgs1 = GraphSet('GC skew') graphdata1 = apply_to_window(genbank_entry.seq, step, calc_gc_skew, step) gdgs1.new_graph(graphdata1, 'GC Skew', style='bar', color=colors.violet, altcolor=colors.purple) gdt4 = Track(\ 'GC Skew (bar)', height=1.94, greytrack=1, scale_largetick_interval=1e4) gdt4.add_set(gdgs1) gdgs2 = GraphSet('GC and AT Content') gdgs2.new_graph(apply_to_window(genbank_entry.seq, step, calc_gc_content, step), 'GC content', style='line', color=colors.lightgreen, altcolor=colors.darkseagreen) gdgs2.new_graph(apply_to_window(genbank_entry.seq, step, calc_at_content, step), 'AT content', style='line', color=colors.orange, altcolor=colors.red) gdt5 = Track(\ 'GC Content(green line), AT Content(red line)', height=1.94, greytrack=1, scale_largetick_interval=1e4) gdt5.add_set(gdgs2) gdgs3 = GraphSet('Di-nucleotide count') step = len(genbank_entry)/400 #smaller step gdgs3.new_graph(apply_to_window(genbank_entry.seq, step, calc_dinucleotide_counts, step), 'Di-nucleotide count', style='heat', color=colors.red, altcolor=colors.orange) gdt6 = Track('Di-nucleotide count', height=0.5, greytrack=False, scale=False) gdt6.add_set(gdgs3) #Add the tracks (from both features and graphs) #Leave some white space in the middle gdd.add_track(gdt4, 3) # GC skew gdd.add_track(gdt5, 4) # GC and AT content gdd.add_track(gdt1, 5) # CDS features gdd.add_track(gdt2, 6) # Gene features gdd.add_track(gdt3, 7) # Misc features and repeat feature gdd.add_track(gdt6, 8) # Feature depth #Finally draw it in both formats, gdd.draw(format='circular', orientation='landscape', tracklines=0, pagesize='A0', circular=True) output_filename = os.path.join('Graphics', 'GD_by_obj_circular.pdf') gdd.write(output_filename, 'PDF') gdd.draw(format='linear', orientation='landscape', tracklines=0, pagesize='A0', fragments=3) output_filename = os.path.join('Graphics', 'GD_by_obj_linear.pdf') gdd.write(output_filename, 'PDF')
def setUp(self) : self.gdd = Diagram('Test Diagram', circular=False, y=0.01, yt=0.01, yb=0.01, x=0.01, xl=0.01, xr=0.01)
def test_partial_diagram(self) : """construct and draw SVG and PDF for just part of a SeqRecord.""" genbank_entry = self.record start = 6500 end = 8750 gdd = Diagram('Test Diagram', #For the circular diagram we don't want a closed cirle: circular=False, ) #Add a track of features, gdt_features = gdd.new_track(1, greytrack=True, name="CDS Features", scale_largetick_interval=1000, scale_smalltick_interval=100, scale_format = "SInt", greytrack_labels=False, height=0.5) #We'll just use one feature set for these features, gds_features = gdt_features.new_set() for feature in genbank_entry.features: if feature.type <> "CDS" : #We're going to ignore these. continue if feature.location.end.position < start : #Out of frame (too far left) continue if feature.location.start.position > end : #Out of frame (too far right) continue #Note that I am using strings for color names, instead #of passing in color objects. This should also work! if len(gds_features) % 2 == 0 : color = "white" #for testing the automatic black border! else : color = "red" #Checking it can cope with the old UK spelling colour. #Also show the labels perpendicular to the track. gds_features.add_feature(feature, colour=color, sigil="ARROW", label_position = "start", label_size = 8, label_angle = 90, label=True) #And draw it... gdd.draw(format='linear', orientation='landscape', tracklines=False, pagesize=(10*cm,6*cm), fragments=1, start=start, end=end) output_filename = os.path.join('Graphics', 'GD_region_linear.pdf') gdd.write(output_filename, 'PDF') #Also check the write_to_string method matches, #(Note the possible confusion over new lines on Windows) assert open(output_filename).read().replace("\r\n","\n") \ == gdd.write_to_string('PDF').replace("\r\n","\n") output_filename = os.path.join('Graphics', 'GD_region_linear.svg') gdd.write(output_filename, 'SVG') #Circular with a particular start/end is a bit odd, but by setting #circular=False (above) a sweep of 90% is used (a wedge is left out) gdd.draw(format='circular', tracklines=False, pagesize=(10*cm,10*cm), start=start, end=end) output_filename = os.path.join('Graphics', 'GD_region_circular.pdf') gdd.write(output_filename, 'PDF') output_filename = os.path.join('Graphics', 'GD_region_circular.svg') gdd.write(output_filename, 'SVG')
def test_diagram_via_methods_pdf(self): """Construct and draw PDF using method approach.""" genbank_entry = self.record gdd = Diagram("Test Diagram") # Add a track of features, gdt_features = gdd.new_track(1, greytrack=True, name="CDS Features", greytrack_labels=0, height=0.5) # We'll just use one feature set for the genes and misc_features, gds_features = gdt_features.new_set() for feature in genbank_entry.features: if feature.type == "gene": if len(gds_features) % 2 == 0: color = "blue" else: color = "lightblue" gds_features.add_feature( feature, color=color, # label_position="middle", # label_position="end", label_position="start", label_size=11, # label_angle=90, sigil="ARROW", label=True, ) # I want to include some strandless features, so for an example # will use EcoRI recognition sites etc. for site, name, color in [ ("GAATTC", "EcoRI", "green"), ("CCCGGG", "SmaI", "orange"), ("AAGCTT", "HindIII", "red"), ("GGATCC", "BamHI", "purple"), ]: index = 0 while True: index = genbank_entry.seq.find(site, start=index) if index == -1: break feature = SeqFeature(FeatureLocation(index, index + 6), strand=None) # This URL should work in SVG output from recent versions # of ReportLab. You need ReportLab 2.4 or later try: url = ("http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi" + "?db=protein&id=%s" % feature.qualifiers["protein_id"][0]) except KeyError: url = None gds_features.add_feature( feature, color=color, url=url, # label_position="middle", label_size=10, label_color=color, # label_angle=90, name=name, label=True, ) index += len(site) del index # Now add a graph track... gdt_at_gc = gdd.new_track(2, greytrack=True, name="AT and GC content", greytrack_labels=True) gds_at_gc = gdt_at_gc.new_set(type="graph") step = len(genbank_entry) // 200 gds_at_gc.new_graph( apply_to_window(genbank_entry.seq, step, calc_gc_content, step), "GC content", style="line", color=colors.lightgreen, altcolor=colors.darkseagreen, ) gds_at_gc.new_graph( apply_to_window(genbank_entry.seq, step, calc_at_content, step), "AT content", style="line", color=colors.orange, altcolor=colors.red, ) # Finally draw it in both formats, gdd.draw( format="linear", orientation="landscape", tracklines=0, pagesize="A4", fragments=3, ) output_filename = os.path.join("Graphics", "GD_by_meth_linear.pdf") gdd.write(output_filename, "PDF") gdd.draw( format="circular", tracklines=False, circle_core=0.8, pagesize=(20 * cm, 20 * cm), circular=True, ) output_filename = os.path.join("Graphics", "GD_by_meth_circular.pdf") gdd.write(output_filename, "PDF")
def test_partial_diagram(self): """Construct and draw SVG and PDF for just part of a SeqRecord.""" genbank_entry = self.record start = 6500 end = 8750 gdd = Diagram( "Test Diagram", # For the circular diagram we don't want a closed cirle: circular=False, ) # Add a track of features, gdt_features = gdd.new_track( 1, greytrack=True, name="CDS Features", scale_largetick_interval=1000, scale_smalltick_interval=100, scale_format="SInt", greytrack_labels=False, height=0.5, ) # We'll just use one feature set for these features, gds_features = gdt_features.new_set() for feature in genbank_entry.features: if feature.type != "CDS": # We're going to ignore these. continue if feature.location.end.position < start: # Out of frame (too far left) continue if feature.location.start.position > end: # Out of frame (too far right) continue # This URL should work in SVG output from recent versions # of ReportLab. You need ReportLab 2.4 or later try: url = ( "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi" + "?db=protein&id=%s" % feature.qualifiers["protein_id"][0]) except KeyError: url = None # Note that I am using strings for color names, instead # of passing in color objects. This should also work! if len(gds_features) % 2 == 0: color = "white" # for testing the automatic black border! else: color = "red" # Checking it can cope with the old UK spelling colour. # Also show the labels perpendicular to the track. gds_features.add_feature( feature, colour=color, url=url, sigil="ARROW", label_position=None, label_size=8, label_angle=90, label=True, ) # And draw it... gdd.draw( format="linear", orientation="landscape", tracklines=False, pagesize=(10 * cm, 6 * cm), fragments=1, start=start, end=end, ) output_filename = os.path.join("Graphics", "GD_region_linear.pdf") gdd.write(output_filename, "PDF") # Also check the write_to_string (bytes string) method matches, with open(output_filename, "rb") as handle: self.assertEqual(handle.read(), gdd.write_to_string("PDF")) output_filename = os.path.join("Graphics", "GD_region_linear.svg") gdd.write(output_filename, "SVG") # Circular with a particular start/end is a bit odd, but by setting # circular=False (above) a sweep of 90% is used (a wedge is left out) gdd.draw( format="circular", tracklines=False, pagesize=(10 * cm, 10 * cm), start=start, end=end, ) output_filename = os.path.join("Graphics", "GD_region_circular.pdf") gdd.write(output_filename, "PDF") output_filename = os.path.join("Graphics", "GD_region_circular.svg") gdd.write(output_filename, "SVG")
def test_diagram_via_object_pdf(self): """Construct and draw PDF using object approach.""" genbank_entry = self.record gdd = Diagram("Test Diagram") gdt1 = Track( "CDS features", greytrack=True, scale_largetick_interval=1e4, scale_smalltick_interval=1e3, greytrack_labels=10, greytrack_font_color="red", scale_format="SInt", ) gdt2 = Track("gene features", greytrack=1, scale_largetick_interval=1e4) # First add some feature sets: gdfsA = FeatureSet(name="CDS backgrounds") gdfsB = FeatureSet(name="gene background") gdfs1 = FeatureSet(name="CDS features") gdfs2 = FeatureSet(name="gene features") gdfs3 = FeatureSet(name="misc_features") gdfs4 = FeatureSet(name="repeat regions") prev_gene = None cds_count = 0 for feature in genbank_entry.features: if feature.type == "CDS": cds_count += 1 if prev_gene: # Assuming it goes with this CDS! if cds_count % 2 == 0: dark, light = colors.peru, colors.tan else: dark, light = colors.burlywood, colors.bisque # Background for CDS, a = gdfsA.add_feature( SeqFeature( FeatureLocation(feature.location.start, feature.location.end, strand=0)), color=dark, ) # Background for gene, b = gdfsB.add_feature( SeqFeature( FeatureLocation( prev_gene.location.start, prev_gene.location.end, strand=0, )), color=dark, ) # Cross link, gdd.cross_track_links.append(CrossLink(a, b, light, dark)) prev_gene = None if feature.type == "gene": prev_gene = feature # Some cross links on the same linear diagram fragment, f, c = fill_and_border(colors.red) a = gdfsA.add_feature(SeqFeature(FeatureLocation(2220, 2230)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(2200, 2210)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c)) f, c = fill_and_border(colors.blue) a = gdfsA.add_feature(SeqFeature(FeatureLocation(2150, 2200)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(2220, 2290)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c, flip=True)) f, c = fill_and_border(colors.green) a = gdfsA.add_feature(SeqFeature(FeatureLocation(2250, 2560)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(2300, 2860)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c)) # Some cross links where both parts are saddling the linear diagram fragment boundary, f, c = fill_and_border(colors.red) a = gdfsA.add_feature(SeqFeature(FeatureLocation(3155, 3250)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(3130, 3300)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c)) # Nestled within that (drawn on top), f, c = fill_and_border(colors.blue) a = gdfsA.add_feature(SeqFeature(FeatureLocation(3160, 3275)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(3180, 3225)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c, flip=True)) # Some cross links where two features are on either side of the linear diagram fragment boundary, f, c = fill_and_border(colors.green) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6450, 6550)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6265, 6365)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c)) f, c = fill_and_border(colors.gold) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6265, 6365)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6450, 6550)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c)) f, c = fill_and_border(colors.red) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6275, 6375)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6430, 6530)), color=f, border=c) gdd.cross_track_links.append( CrossLink(a, b, color=f, border=c, flip=True)) f, c = fill_and_border(colors.blue) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6430, 6530)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6275, 6375)), color=f, border=c) gdd.cross_track_links.append( CrossLink(a, b, color=f, border=c, flip=True)) cds_count = 0 for feature in genbank_entry.features: if feature.type == "CDS": cds_count += 1 if cds_count % 2 == 0: gdfs1.add_feature(feature, color=colors.pink, sigil="ARROW") else: gdfs1.add_feature(feature, color=colors.red, sigil="ARROW") if feature.type == "gene": # Note we set the colour of ALL the genes later on as a test, gdfs2.add_feature(feature, sigil="ARROW") if feature.type == "misc_feature": gdfs3.add_feature(feature, color=colors.orange) if feature.type == "repeat_region": gdfs4.add_feature(feature, color=colors.purple) # gdd.cross_track_links = gdd.cross_track_links[:1] gdfs1.set_all_features("label", 1) gdfs2.set_all_features("label", 1) gdfs3.set_all_features("label", 1) gdfs4.set_all_features("label", 1) gdfs3.set_all_features("hide", 0) gdfs4.set_all_features("hide", 0) # gdfs1.set_all_features('color', colors.red) gdfs2.set_all_features("color", colors.blue) gdt1.add_set(gdfsA) # Before CDS so under them! gdt1.add_set(gdfs1) gdt2.add_set(gdfsB) # Before genes so under them! gdt2.add_set(gdfs2) gdt3 = Track("misc features and repeats", greytrack=1, scale_largetick_interval=1e4) gdt3.add_set(gdfs3) gdt3.add_set(gdfs4) # Now add some graph sets: # Use a fairly large step so we can easily tell the difference # between the bar and line graphs. step = len(genbank_entry) // 200 gdgs1 = GraphSet("GC skew") graphdata1 = apply_to_window(genbank_entry.seq, step, calc_gc_skew, step) gdgs1.new_graph( graphdata1, "GC Skew", style="bar", color=colors.violet, altcolor=colors.purple, ) gdt4 = Track("GC Skew (bar)", height=1.94, greytrack=1, scale_largetick_interval=1e4) gdt4.add_set(gdgs1) gdgs2 = GraphSet("GC and AT Content") gdgs2.new_graph( apply_to_window(genbank_entry.seq, step, calc_gc_content, step), "GC content", style="line", color=colors.lightgreen, altcolor=colors.darkseagreen, ) gdgs2.new_graph( apply_to_window(genbank_entry.seq, step, calc_at_content, step), "AT content", style="line", color=colors.orange, altcolor=colors.red, ) gdt5 = Track( "GC Content(green line), AT Content(red line)", height=1.94, greytrack=1, scale_largetick_interval=1e4, ) gdt5.add_set(gdgs2) gdgs3 = GraphSet("Di-nucleotide count") step = len(genbank_entry) // 400 # smaller step gdgs3.new_graph( apply_to_window(genbank_entry.seq, step, calc_dinucleotide_counts, step), "Di-nucleotide count", style="heat", color=colors.red, altcolor=colors.orange, ) gdt6 = Track("Di-nucleotide count", height=0.5, greytrack=False, scale=False) gdt6.add_set(gdgs3) # Add the tracks (from both features and graphs) # Leave some white space in the middle/bottom gdd.add_track(gdt4, 3) # GC skew gdd.add_track(gdt5, 4) # GC and AT content gdd.add_track(gdt1, 5) # CDS features gdd.add_track(gdt2, 6) # Gene features gdd.add_track(gdt3, 7) # Misc features and repeat feature gdd.add_track(gdt6, 8) # Feature depth # Finally draw it in both formats, and full view and partial gdd.draw(format="circular", orientation="landscape", tracklines=0, pagesize="A0") output_filename = os.path.join("Graphics", "GD_by_obj_circular.pdf") gdd.write(output_filename, "PDF") gdd.circular = False gdd.draw( format="circular", orientation="landscape", tracklines=0, pagesize="A0", start=3000, end=6300, ) output_filename = os.path.join("Graphics", "GD_by_obj_frag_circular.pdf") gdd.write(output_filename, "PDF") gdd.draw( format="linear", orientation="landscape", tracklines=0, pagesize="A0", fragments=3, ) output_filename = os.path.join("Graphics", "GD_by_obj_linear.pdf") gdd.write(output_filename, "PDF") gdd.set_all_tracks("greytrack_labels", 2) gdd.draw( format="linear", orientation="landscape", tracklines=0, pagesize=(30 * cm, 10 * cm), fragments=1, start=3000, end=6300, ) output_filename = os.path.join("Graphics", "GD_by_obj_frag_linear.pdf") gdd.write(output_filename, "PDF")
def setUp(self): """Initialise diagram.""" self.gdd = Diagram("Test Diagram", circular=False, y=0.01, yt=0.01, yb=0.01, x=0.01, xl=0.01, xr=0.01)
from reportlab.lib import colors from reportlab.lib.units import cm # Biopython core from Bio import SeqIO from Bio.SeqFeature import SeqFeature, FeatureLocation # Bio.Graphics.GenomeDiagram from Bio.Graphics.GenomeDiagram import Diagram ################################################################################ #load the genbank file that contains the genes gbk_filename = "Buchnera.gbk" genbank_entry = SeqIO.read(open(gbk_filename), "genbank") gdd = Diagram('Test Diagram') #Add a track of features, gdt_features = gdd.new_track( 1, greytrack=True, name="CDS Features", scale_largetick_interval=10000, scale_smalltick_interval=1000, scale_fontsize=4, scale_format="SInt", greytrack_labels=False, #e.g. 5 height=0.75) #We'll just use one feature set for these features, gds_features = gdt_features.new_set()