def main(name): record = SeqIO.read("Genome.gb", "genbank") # Create the feature set and its feature objects gd_feature_set = GenomeDiagram.FeatureSet() for feature in record.features: if feature.type != "gene": continue if len(gd_feature_set) % 2 == 0: color = colors.purple else: color = colors.lightblue gd_feature_set.add_feature(feature, color=color, label=True, label_size=14, label_angle=10) # Create a track and a diagram gd_track_for_features = GenomeDiagram.Track(name="Annotated Features") gd_diagram = GenomeDiagram.Diagram("Curly Tomato Stunt Virus") gd_track_for_features.add_set(gd_feature_set) gd_diagram.add_track(gd_track_for_features, 1) gd_diagram.draw( format="circular", circular=True, pagesize=(20 * cm, 20 * cm), start=0, end=len(record), circle_core=0.7, ) gd_diagram.write("tomato_curly_stunt_virus.JPG", "JPG")
def drawsvg(clust): for k in clust: fs = GenomeDiagram.FeatureSet() for g in clust[k]['gene']: fs.add_feature( clust[k]['gene'][g], color = colors.lightgreen, label=True, label_size=10, label_angle=90, sigil="BIGARROW", arrowshaft_height=0.5, arrowhead_length=0.25 ) track = GenomeDiagram.Track(name = k) diag = GenomeDiagram.Diagram() track.add_set(fs) diag.add_track(track, 1) pglen = float(clust[k]['end'] - clust[k]['start']) / float(1000) diag.draw( format = "linear", orientation = "landscape", pagesize = (pglen*cm, 5*cm), fragments = 1, start = clust[k]['start'], end= clust[k]['end'] ) diag.write('.'.join([pref, k, 'svg']), "SVG")
def bottom_up(): from reportlab.lib import colors from reportlab.lib.units import cm from Bio.Graphics import GenomeDiagram from Bio import SeqIO record = SeqIO.read("NC_005816.gb", "genbank") # Create the feature set and its feature objects, gd_feature_set = GenomeDiagram.FeatureSet() for feature in record.features: if feature.type != "gene": # Exclude this feature continue if len(gd_feature_set) % 2 == 0: color = colors.blue else: color = colors.lightblue gd_feature_set.add_feature(feature, color=color, label=True) # (this for loop is the same as in the previous example) # Create a track, and a diagram gd_track_for_features = GenomeDiagram.Track(name="Annotated Features") gd_diagram = GenomeDiagram.Diagram( "Yersinia pestis biovar Microtus plasmid pPCP1") # Now have to glue the bits together... gd_track_for_features.add_set(gd_feature_set) gd_diagram.add_track(gd_track_for_features, 1)
def __init__(self, trackName): self.trackName = trackName self.gdTrack = GenomeDiagram.Track(greytrack=True, name=self.trackName, greytrack_labels=1, greytrack_fontsize=33) self.gdFeature = GenomeDiagram.FeatureSet() self.size = 0 self.nbFeats = 0
def plot_unique_genome_diagram(gbk, unique_loci): parser = GenBank.FeatureParser() fhandle = open(gbk, 'r') genbank_entry = parser.parse(fhandle) fhandle.close() gdd = GenomeDiagram.Diagram(gbk) gd_track_for_features = gdd.new_track(1, name="CDS", scale_smalltick_interval=100000) gdfs = gd_track_for_features.new_set() for feature in genbank_entry.features: if feature.type == 'CDS': feature.strand = 1 if feature.qualifiers['locus_tag'][0] in unique_loci: gdfs.add_feature(feature, color=rcolors.HexColor("#93341F")) else: gdfs.add_feature(feature, color=rcolors.HexColor("#058F45")) gdd.draw(format='circular', orientation='landscape', tracklines=0, pagesize='A5', fragments=5, circular=1) return gdd
def makeGenomeDiagram(seqFeatureNameToList, genomeDiagramName, seqFeatureToColor = None): '''INPUT: seqFeatureNameToList: A dictionary ring_name --> [ seq_features_within_ring ] genomeDiagramName: A string defining the name of your object. optional: seqFeatureToColor: A dictionary ring_name --> color (a reportlab.lib color object) OUTPUT: A GenomeDiagram object storing all of those seqFeature objects within the appropriate rings. The order of rings will be in alphabetical order by name by default. Switch the order with gd_diagram.move_track(old_loc,new_loc) ''' gd_diagram = GenomeDiagram.Diagram(genomeDiagramName) tracknum = 0 for name in sorted(seqFeatureNameToList.keys()): tracknum += 1 gd_track_for_features = gd_diagram.new_track(tracknum, name=name) gd_feature_set = gd_track_for_features.new_set() # Add the things we want to actually put ON the circle. for feature in seqFeatureNameToList[name]: if seqFeatureToColor is None: color = colors.blue else: color = seqFeatureToColor[name] gd_feature_set.add_feature(feature, color=color, label=False, sigil="ARROW", arrowshaft_height=0.5, name=feature.id ) # gd_feature_set.add_feature(feature, color=color, label=True, label_size = 12, label_position = "middle", sigil="ARROW", arrowshaft_height=0.5, name=test_feature.id ) return gd_diagram
def main(args): gfhash = gff3.get_gff_hash(args['gffile']) gid, Diagstart, Diagstop = get_coordinates_for_diagram( gfhash, args['genes']) print gid, Diagstart, Diagstop gd_diagram = GenomeDiagram.Diagram(gid) gd_track_for_features = gd_diagram.new_track(1, name="Annotated Genes") gd_feature_set = gd_track_for_features.new_set() for gf in sorted(gfhash[gid], key=lambda x: x.start): if gf.ftype != 'mRNA': continue if gf.start > gf.stop: gf.start, gf.stop = gf.stop, gf.start if gf.stop < Diagstart or gf.start > Diagstop: continue f = SeqFeature(FeatureLocation(max([gf.start, Diagstart]), min([gf.stop, Diagstop])), strand=int(gf.strand + '1'), type=gf.get_attributes()['ID']) gd_feature_set.add_feature(f, label=True, label_size=10, label_angle=0, sigil="ARROW") print gf.get_attributes()['ID'], gf.start, gf.stop gd_diagram.draw(start=Diagstart, end=Diagstop, format='linear', fragments=1, pagesize=(100 * cm, 4 * cm)) outfile = os.path.split(args['gffile'])[1] + "_" + string.join( args['genes'], "_") + '.pdf' gd_diagram.write(outfile, "PDF") print outfile
def draw_genome_map(infile, filename=None): """Draw whole circular genome""" from Bio.Graphics import GenomeDiagram from Bio.SeqUtils import GC from reportlab.lib import colors genome = SeqIO.read(infile, 'genbank') gdd = GenomeDiagram.Diagram('test') gdt1 = gdd.new_track(4, greytrack=1, name='CDS', scale=0) gdt2 = gdd.new_track(3, greytrack=1, name='tRNA', scale=0) gdt3 = gdd.new_track(2, greytrack=0, name='GC content', scale=0) gdf1 = gdt1.new_set('feature') gdf2 = gdt2.new_set('feature') gdgs = gdt3.new_set('graph') graphdata = [(f.location.start, GC(f.extract(genome.seq))) for f in genome.features] #print graphdata gdgs.new_graph(graphdata, 'GC content', style='line', colour=colors.black) for feature in genome.features: if feature.type == 'CDS': gdf1.add_feature(feature, label=False, colour=colors.green) elif feature.type == 'tRNA': gdf2.add_feature(feature, label=True, colour=colors.red) gdd.draw(format='circular', orientation='landscape', tracklines=0, pagesize='A4', fragments=5, circular=1) if filename == None: filename = 'genediagram.png' gdd.write(filename, "PNG") return filename
def draw_by_bio(): diagram = GenomeDiagram.Diagram('Test Diagram') def _track(track_level): track = diagram.new_track(track_level, greytrack=False) feature_set = track.new_set() for name, it in zip(names, intervals): feat = SeqFeature(FeatureLocation(*it, strand=1)) feature_set.add_feature(feat, name=name, label=True, label_angle=90) for i, feat in enumerate(record.features[:8]): loc = feat.location eta = 4.8045 feat.location = FeatureLocation(int(loc.start / eta), int(loc.end / eta), strand=-1) color = colors.blue if i % 2 == 0 else colors.lightblue feature_set.add_feature(feat, color=color, label=True) _track(1) # _track(2) diagram.draw(format='circular', circular=True, pagesize=(15 * cm, 15 * cm), fragments=1, start=0, end=phase_1 + phase_2, circle_core=.9) diagram.write("GD_labels_shorts_1.pdf", "pdf")
def minimal_feats(): from Bio.SeqFeature import SeqFeature, FeatureLocation from Bio.Graphics import GenomeDiagram from reportlab.lib.units import cm gdd = GenomeDiagram.Diagram('Test Diagram') gdt_features = gdd.new_track(1, greytrack=False) gds_features = gdt_features.new_set() # Add three features to show the strand options, feature = SeqFeature(FeatureLocation(25, 125), strand=+1) gds_features.add_feature(feature, name="Forward", label=True) feature = SeqFeature(FeatureLocation(150, 250), strand=None) gds_features.add_feature(feature, name="Strandless", label=True) feature = SeqFeature(FeatureLocation(275, 375), strand=-1) gds_features.add_feature(feature, name="Reverse", label=True) gdd.draw(format='circular', circular=True, pagesize=(15 * cm, 15 * cm), fragments=1, start=0, end=400, circle_core=.8) gdd.write("GD_labels_default.pdf", "pdf")
def top_down(): from reportlab.lib import colors from reportlab.lib.units import cm from Bio.Graphics import GenomeDiagram from Bio import SeqIO record = SeqIO.read("NC_005816.gb", "genbank") gd_diagram = GenomeDiagram.Diagram( "Yersinia pestis biovar Microtus plasmid pPCP1") gd_track_for_features = gd_diagram.new_track(1, name="Annotated Features") gd_feature_set = gd_track_for_features.new_set() for feature in record.features: if feature.type != "gene": # Exclude this feature continue if len(gd_feature_set) % 2 == 0: color = colors.blue else: color = colors.lightblue gd_feature_set.add_feature(feature, color=color, label=True) # gd_diagram.draw(format="linear", orientation="landscape", pagesize='A4', fragments=4, start=0, end=len(record)) # gd_diagram.write("plasmid_linear.pdf", "PDF") circle_core = .8 gd_diagram.draw(format="circular", circular=True, orientation="landscape", pagesize='A4', start=0, end=len(record), circle_core=circle_core) gd_diagram.write("plasmid_circular.pdf", "PDF")
def plot_genomic_regions(locustagfile,genomedb,pypdir,span=50000,hl_groups=[],labels=False): strains = [] for line in open(os.path.abspath(locustagfile),'r'): vals = line.rstrip().split("\t") strains.append([vals[0],vals[1]]) for line in open(os.path.join(os.path.abspath(genomedb),"genome_metadata.txt"),'r'): vals = line.rstrip().split("\t") for i in range(0,len(strains)): if strains[i][0] == vals[2]: strains[i].append(vals[6].split("-")[0]) GD = GenomeDiagram.Diagram('gbk',"temp.pdf") count = 1 locus_tags = {} for g in reversed(strains): if g[0] not in locus_tags: locus_tags[g[0]] = [] contigseq, coords = _parse_genbank(g,genomedb) _make_tracks(contigseq, span, coords, g, GD, count, locus_tags, labels) count += 1 groups = _find_homologs(GD, locus_tags,os.path.join(pypdir,"locustag_matrix.txt"),hl_groups,set([x[0] for x in strains])) _change_colors(GD, groups) return GD
def original_draw(): g=get_genome("Gthg_TM242_v3.0.gb")[0][1077101:1137446] #SeqIO.write([g[3720000:3727000]], "clipping.gb", "genbank") gd_diagram = GenomeDiagram.Diagram(g.id) gd_track_for_features = gd_diagram.new_track(1, name="Annotated Features") gd_feature_set = gd_track_for_features.new_set() for feature in g.features: if feature.qualifiers['product'][0].lower().find('hypo')>-1: color = colors.lightblue gd_feature_set.add_feature(feature, sigil="ARROW", color=color, label=False) elif feature.qualifiers['product'][0].lower().find('transposase')>-1: color = colors.lightgrey gd_feature_set.add_feature(feature, sigil="ARROW", color=color, label=False) else: color = colors.blue shortened=feature.qualifiers['product'][0] if len(shortened)>20: shortened=shortened[0:20]+'...' gd_feature_set.add_feature(feature, sigil="ARROW", color=color, label=True, name=shortened, label_size = 8, label_angle=30) gd_diagram.draw(format="linear", pagesize='LEGAL', orientation='landscape', fragments=4, start=0, end=len(g)) gd_diagram.write("C:\\Users\\Cass\\Desktop\\Geo 2\\hemicellulose\\hemi.pdf", "PDF") gd_diagram.write("C:\\Users\\Cass\\Desktop\\Geo 2\\hemicellulose\\hemi.eps", "EPS") gd_diagram.write("C:\\Users\\Cass\\Desktop\\Geo 2\\hemicellulose\\hemi.svg", "SVG") gd_diagram.write("C:\\Users\\Cass\\Desktop\\Geo 2\\hemicellulose\\hemi.png", "PNG")
def draw_diagram(genomes_dict, og_id, n_context, colors): ''' ''' gdd = GenomeDiagram.Diagram('Test Diagram') i = 0 for genome, genes in genomes_dict.items(): track = gdd.new_track(i, greytrack=True, name=genome) features = track.new_set() for gene in genes: feature = SeqFeature(FeatureLocation(gene[1], gene[2]), strand=gene[3]) features.add_feature( feature, name=f"{gene[-1]} {gene[-2]}", label=True, color=colors[gene[-1]], label_angle=choose_angle(gene[3]), sigil="ARROW", arrowshaft_height=1, label_size=10, label_position="middle", ) i += 1 gdd.draw(format='linear', fragments=1) gdd.write(f"{og_id}_{n_context}.png", "png")
def draw_gene(sequence): """ (genbank file) - > image (pdf, png etc) """ record = SeqIO.read(sequence, "genbank") diagram = GenomeDiagram.Diagram(record.id) feature_track = diagram.new_track(1, name="Annotated Features") feature_set = feature_track.new_set() for feature in record.features: if feature.type != "gene": #Exclude this feature continue if len(feature_set) % 2 == 0: color = colors.blue else: color = colors.lightblue feature_set.add_feature(feature, sigil="ARROW", label_size=14,color=color, label=True) diagram.draw(format="cirular", circular=True, pagesize=(50*cm,50*cm), fragments=1, start=0, end=len(record), circle_core=0.5) diagram.write("plasmid.pdf", "PDF")
def motifGenomeDiagram(pckname, filetype = ['finalExpression', 'annotation', 'TFset']): from reportlab.lib import colors from reportlab.lib.units import cm from Bio.Graphics import GenomeDiagram from Bio import SeqIO from Bio.SeqFeature import SeqFeature, FeatureLocation wt = readData(pckname, filetype[0]) d = readData(pckname, filetype[1]) d['strand'] = [-1 if i==0 else 1 for i in d.strand] tfset = readData(pckname, filetype[2]) cols = list(colors.getAllNamedColors().keys()) cols = [col for col in cols if 'pale' not in col and 'light' not in col and 'white' not in col and 'snow' not in col and 'ivory' not in col] cols = np.random.choice(cols, len(tfset.tf_name.unique())) gdd = GenomeDiagram.Diagram(pckname) for gene in d.gene.unique(): this_gene = d[:][d.gene == gene] gdt_features = gdd.new_track(len(d.gene.unique()) - gene, greytrack=False) gds_features = gdt_features.new_set() for motif in this_gene.axes[0]: if(this_gene.strand[motif] == 1): angle = 0 else: angle = 180 mot_size = len(tfset.consensus[tfset.tf_name == this_gene.tf_ind[motif]][0]) feature = SeqFeature(FeatureLocation(int(this_gene.positions[motif]), int(this_gene.positions[motif] + mot_size - 1)), strand=this_gene.strand[motif]) gds_features.add_feature(feature, name=str(this_gene.tf_ind[motif]), label=True, label_position="start", label_size = 6, label_angle=angle, color = cols[this_gene.tf_ind[motif]], sigil="BIGARROW", arrowshaft_height=this_gene.percent[motif], arrowhead_length=1) gdd.draw(format='linear', pagesize=(15*cm, 10*cm), fragments=1,start=0, end=len(wt.seqs[0]), orientation = 'portrait', tracklines = 0) gdd.write(pckname +"_motifs.pdf", "pdf")
def plot_simple_region(region_record, out_name): gd_diagram = GenomeDiagram.Diagram("geomic_region") gd_track_for_features = gd_diagram.new_track(1, name=region_record.name, greytrack=True, height=0.5, start=0, end=len(region_record)) gd_feature_set = gd_track_for_features.new_set() color1 = colors.HexColor('#40F13A') color2 = colors.HexColor('#0F600C') for feature in region_record.features: if feature.type != "CDS": continue try: a = feature.qualifiers["locus_tag"] except: # cas des pseudogenes qui sont des CDS mais n'ont pas de protein ID continue if len(gd_feature_set) % 2 == 0: color = color1 else: color = color2 gd_feature_set.add_feature(feature, sigil="ARROW", color=color, label=True, label_position="middle", label_strand=1, label_size=10, label_angle=40) hauteur = 250 largeur = len(region_record) / 30 #print "hauteur", hauteur #print "largeur", largeur if hauteur > largeur: gd_diagram.draw(format="linear", pagesize=(hauteur, largeur), orientation='portrait', fragments=1, start=0, end=len(region_record)) else: gd_diagram.draw(format="linear", pagesize=(hauteur, largeur), orientation='landscape', fragments=1, start=0, end=len(region_record)) #print "writing diagram", out_name gd_diagram.write(out_name, "SVG")
def export_PlasmidMap(input_file): record = SeqIO.read(input_file, "genbank") gd_diagram = GenomeDiagram.Diagram(record.id) gd_track_for_features = gd_diagram.new_track(1, name="Annotated Features") gd_feature_set = gd_track_for_features.new_set() print(gd_feature_set) print(len(record)) # print(record.features) # print(record.features) for feature in record.features: if feature.type != "CDS": # Exclude this feature continue if len(gd_feature_set) % 2 == 0: # print(gd_feature_set) color = colors.lightblue else: color = colors.blue gd_feature_set.add_feature( feature, sigil="ARROW", color=color, label_size=12, label_angle=0, label=True, ) print(gd_feature_set) # Draw Linear map from genbank gd_diagram.draw( format="linear", orientation="landscape", pagesize="A4", fragments=4, start=0, end=len(record), ) gd_diagram.write("plasmid_linear.pdf", "PDF") gd_diagram.write("plasmid_linear.png", "PNG") # Draw circular map from genbank gd_diagram.draw( format="circular", circular=True, pagesize=(35 * cm, 30 * cm), start=0, end=len(record), circle_core=0.5, ) gd_diagram.write("plasmid_circular.pdf", "PDF")
def __init__(self, name): super(PhyloGraph,self).__init__() self.name = name self.genome_set = [] self.max_len = 0 self.gd_diagram = GenomeDiagram.Diagram(name) self.features = []
def make_diagram(genome_record: SeqIO.SeqRecord, pseudo_record: SeqIO.SeqRecord, outfile: str): """Plots the genome with pseudogenes on another track""" diagram = GenomeDiagram.Diagram() original_features = GenomeDiagram.FeatureSet( ) # These features will be from the original genbank file for feature in genome_record.features: # genome_record is the record from the original genbank file if feature.type != "gene": # Exclude this feature continue if len(original_features) % 2 == 0: # Alternate colours color = colors.blue else: color = colors.lightblue original_features.add_feature(feature, color=color) track_for_original_features = GenomeDiagram.Track( name="Original Features", scale_largetick_interval=100000, scale_largeticks=5, scale_fontangle=180, scale_fontsize=10) track_for_original_features.add_set(original_features) diagram.add_track(track=track_for_original_features, track_level=1) pseudo_features = GenomeDiagram.FeatureSet( ) # These features will be from the pseudogene annotation for feature in pseudo_record.features: if len(pseudo_features) % 2 == 0: # Alternate colours color = colors.red else: color = colors.lightcoral pseudo_features.add_feature(feature, color=color) track_for_pseudogenes = GenomeDiagram.Track(name="Pseudogenes", scale_largetick_labels=0) track_for_pseudogenes.add_set(pseudo_features) diagram.add_track(track=track_for_pseudogenes, track_level=2) diagram.draw(format="circular", circular=True, start=0, end=len(genome_record), circle_core=0.8) diagram.write(filename=outfile, output="PDF")
def __init__(self,name): """ Creates a new ToxinGraphics object containing a new GenomeDiagram with a single track and feature set """ ### May be worth just creating an object and then applying the methods to it within event handlers etc self.name = name self.record = None self.gd_diagram = GenomeDiagram.Diagram(name) self.gd_track_for_features = self.gd_diagram.new_track(1, name="Annotated Features") self.gd_feature_set = self.gd_track_for_features.new_set()
def drawFeatures(tracks, highlight=[], filename=None, color=None, pagesize=(600, 200), name=''): """Draw gene features in multiple tracks for general comparison""" from reportlab.lib import colors from reportlab.lib.units import cm from Bio.Graphics import GenomeDiagram gd_diagram = GenomeDiagram.Diagram("test1") i = 0 locs = [f.location for f in tracks[0]] start = min([l.start for l in locs]) end = max([l.end for l in locs]) for features in tracks: track = gd_diagram.new_track(1, greytrack=True, greytrack_labels=2, name=name, scale_fontsize=10, scale_fontangle=0, scale_largetick_interval=1500, scale_color=colors.gray, scale_largeticks=1, scale_smallticks=0, greytrack_fontsize=20) gd_feature_set = track.new_set() #features.sort(key=lambda x: abs(x.location.end-x.location.start)) for feature in features: if color != None: c = color else: c = typecolors[i] if feature in highlight: c = 'red' if feature.type != 'CDS': continue gd_feature_set.add_feature(feature, color=c, label=True, sigil="ARROW", arrowhead_length=0.5, arrowshaft_height=0.2, tracklines=1, label_size=12, label_angle=45) i += 1 gd_diagram.draw(format='linear', orientation="landscape", pagesize=pagesize, fragments=1, start=start, end=end) if filename == None: filename = 'genediagram' gd_diagram.write(filename + '.png', "PNG") return filename + '.png'
def __init__(self): self.gdd = GenomeDiagram.Diagram('Diagram') self.gdt_features = self.gdd.new_track(1, greytrack=False) self.gds_features = self.gdt_features.new_set() self.start = 1 self.end = 1000 self.frag = 1 self.blast_num = 0 self.blast_rownum = 10
def make_diagrams(): record = SeqIO.read(gbk_file, "genbank") gd_diagram = GenomeDiagram.Diagram(record.id) gdt_features = gd_diagram.new_track(0, scale=1, scale_largeticks=-2, height=1, greytrack=1, scale_largetick_interval=100000) gds_features = gdt_features.new_set() for key in range(len(flanking_regions_and_hits)): for seq_record in SeqIO.parse(gbk_file, "genbank"): hit_min_cord = ((df.ix[flanking_regions_and_hits[key][0], 'Minimum'])) hit_max_cord = ((df.ix[flanking_regions_and_hits[key][1], 'Maximum'])) hit_size = hit_max_cord - hit_min_cord if hit_size > 0: feature = SeqFeature( FeatureLocation(hit_min_cord, hit_max_cord)) gds_features.add_feature(feature, name="Cluster_{}".format(key + 1), label_size=15, label_position="middle", label_color=colors.green, label=True, sigil="BOX") #Highlight assembly gaps in green for site, name, color in [("NNNNNNNNNNNNNNNNNNNN", " ", colors.black)]: index = 0 while True: index = record.seq.find(site, start=index) if index == -1: break feature = SeqFeature(FeatureLocation(index, index + len(site))) gds_features.add_feature(feature, color=colors.red, name=name, label=True, label_size=8, label_color=color, label_position="middle") index += len(site) gd_diagram.draw(format="circular", circular=True, circle_core=0.5, pagesize='A4', fragment_size=0, fragments=0, start=0, tracklines=1, track_size=0.5, x=0.05, y=0.05, end=len(record)) gd_diagram.write("{}/overall.pdf".format(folder_name), "PDF")
def recursive_draw_with_genomeDiagram(dock, clusters_per_page): """ This recursive function will draw one genome diagram each until all gene clusters are draw.""" if dock.draw >= dock.size: # End condition return cwd = os.getcwd() # Create alphabet list of uppercase letters alphabet = [] for letter in range(65, 91): alphabet.append(chr(letter)) fn_colors = prepare_color_dict() # a small dictionnary that associate colors to gene categories iterare = round(dock.draw/clusters_per_page) + 1 # to known how many times the function have been called ''' Get the next clusters that were not already draw. They are stored (in reverse order of size) in the dock object''' next_clusters = [] next_clusters = dock.elems[dock.draw:(dock.draw + clusters_per_page)] # Creation of the genomeDiagram object for that call gd_diagram = GenomeDiagram.Diagram(dock.name + '_' + repr(iterare)) # For each geneClusters, we will create a track, add features, etc! # The track_name print above each track came from the name of the GenBank record for geneCluster in next_clusters: name = geneCluster.record.annotations["source"] # !need to fix this to ensure that we have a good label above each track! ''' It is important to get the longest gbk record to keep proportionarity between clusters present on different diagrams but belonging to the same dock. It is why, this value is put into a dock attribute. ''' dock.max_cluster_len = max(dock.max_cluster_len, len(geneCluster.record)) # Adding a track; each track being a different geneCluster gd_track_for_features = gd_diagram.new_track(1, name=(geneCluster.strain_name + '_' + geneCluster.molecule_name), greytrack=True, start=0, height=1, end = len(geneCluster.record), scale=1, scaleticks=0) # Create an empty set of features linked to the newly created track gd_feature_set = gd_track_for_features.new_set() ''' This is our home-made fn to fill the features set with stuff like genes colored according to categories, specific label for AMR genes, etc! Inside the function digest_feature, there are many calls to gd_feature_set.add_feature ''' gd_feature_set = digest_features(geneCluster.record, fn_colors, gd_feature_set) # Calculate the number of needed blanck tracks and add them n_blank_tracks = clusters_per_page - len(next_clusters) add_blank_tracks(gd_diagram, n_blank_tracks) gd_diagram.draw(format="linear", orientation="landscape", fragments=1, start=0, end=dock.max_cluster_len, track_size=0.75, tracklines=0) gd_diagram.write(cwd + '/output_' + target_gene + '/maps_pdf/' + dock.name + '_page' + repr(iterare) + '_linear.pdf', "PDF") gd_diagram.write(cwd + '/output_' + target_gene + '/maps_svg/' + dock.name + '_page' + repr(iterare) + '_linear.svg', "SVG") # Update the dock.draw counter dock.draw += clusters_per_page recursive_draw_with_genomeDiagram(dock, clusters_per_page)
def draw_clusters(self): print 'Creating cluster diagram.' # create diagram self.diagram = GenomeDiagram.Diagram(self.name) # add tracks max_len = max(len(c) for c in self.clusters) normal_color = colors.grey if self.no_color else colors.black mark_color = colors.black if self.no_color else colors.red for cluster in self.clusters: col = mark_color if cluster.marked else normal_color track = self.diagram.new_track(1, name=cluster.label, greytrack=1, height=0.4, greytrack_fontcolor=col, greytrack_labels=1, greytrack_fontsize=self.name_size, scale=False, start=0, end=max_len) self.fsets.append(track.new_set()) # add crosslink features if self.add_crosslinks: self._compose_crosslinks() # add CDS-es for ci, cluster in enumerate(self.clusters): gene_id = 0 for f in cluster.CDS: known = False fname = 'NONE' if gene_id < len(cluster.genes): fname = cluster.genes[gene_id] fcolor = self.colors.get(fname, self.default_color) if fname == 'NONE': fname = self._feature_name(f, default='') fcolor = self.default_color else: #decapitalize gene names if they are marked as proteins # fname = fname[:1].lower() + fname[1:] if fname else '' known = True if self.no_color: fcolor = colors.color2bw(fcolor) self.fsets[ci].add_feature(f, sigil="BIGARROW", color=fcolor, border=fcolor if self.no_border else colors.black, name=fname, label=True, label_position="middle", label_size=self.gene_size if known else self.unknown_gene_size, label_color=colors.black if known else colors.grey, label_angle=self.gene_angle) gene_id += 1 self.diagram.draw(format="linear", pagesize=self.pagesize, fragments=1, start=0, end=max_len) for ptype in ('PDF', 'EPS', 'SVG'): dianame = '%s.%s' % (self.name, ptype.lower()) print 'Saving: %s' % dianame self.diagram.write(dianame, ptype) print 'Done.'
def VisGeneFlk(args): gene_id = args['gene'] input_gbk = args['gbk'] flanking_length = args['len'] plot_scale = args['scale'] plot_fmt = args['fmt'] plot_wd = '%s_flk%s_wd' % (gene_id, flanking_length) gbk_subset_located_seq = '%s/%s.gbk' % (plot_wd, gene_id) gbk_subset_flanking_gene = '%s/%s_%sbp.gbk' % (plot_wd, gene_id, flanking_length) plot_file = '%s_flk%sbp.%s' % (gene_id, flanking_length, plot_fmt) if os.path.isdir(plot_wd) is False: os.mkdir(plot_wd) else: os.system('rm -r %s' % plot_wd) os.mkdir(plot_wd) dict_value_list = [] for seq_record in SeqIO.parse(input_gbk, 'genbank'): for gene_feature in seq_record.features: if 'locus_tag' in gene_feature.qualifiers: if gene_id in gene_feature.qualifiers["locus_tag"]: dict_value_list.append([gene_id, int(gene_feature.location.start), int(gene_feature.location.end), gene_feature.location.strand, len(seq_record.seq)]) SeqIO.write(seq_record, gbk_subset_located_seq, 'genbank') get_flanking_region(gbk_subset_located_seq, gene_id, flanking_length) # get the distance of the gene to contig ends gene_1_left_len = dict_value_list[0][1] gene_1_right_len = dict_value_list[0][4] - dict_value_list[0][2] # read in gbk file sequence_record = SeqIO.read(gbk_subset_flanking_gene, "genbank") # create an empty diagram diagram = GenomeDiagram.Diagram() plot_len_cm = len(sequence_record)/plot_scale # add tracks to diagram track_footnote = '%s (left %sbp, right %sbp)' % (sequence_record.name, gene_1_left_len, gene_1_right_len) track_footnote = sequence_record.name seq_track = diagram.new_track(1, name=track_footnote, greytrack=True, greytrack_labels=1, greytrack_font='Helvetica', greytrack_fontsize=12, height=0.35, start=0, end=len(sequence_record), scale=True, scale_fontsize=6, scale_ticks=1, scale_smalltick_interval=10000, scale_largetick_interval=10000) # create blank feature set and add gene features to it feature_set = seq_track.new_set(type='feature') set_contig_track_features(sequence_record, gene_id, feature_set) # draw and export diagram.draw(format='linear', orientation='landscape', pagesize=(20*cm, plot_len_cm*cm), fragments=1, start=0, end=len(sequence_record)) diagram.write(plot_file, plot_fmt)
def draw_cluster_gene_diagram(bug, cluster, loci, fig_path): # compile a dict such that {locus_tag}:{start, end, strand, product} data_dict = dict() for tag in loci: for orf in bug.genes: if tag == orf.locus_tag: data_dict[tag] = (orf.seq_start, orf.seq_end, orf.is_complement, orf.function) if tag == loci[0]: d_start = orf.seq_start if tag == loci[(len(loci))-1]: d_end = orf.seq_end s_tick_int = int((d_end - d_start) / 5) # create an empty genome diagram gdd = GenomeDiagram.Diagram(bug.accession_num) gdt_features = gdd.new_track(1, greytrack=True, scale_smalltick_interval=s_tick_int, scale_smalltick_labels=True, scale_smallticks=0.1, scale_fontangle=0, scale_fontsize=4, name=bug.accession_num) gds_features = gdt_features.new_set() # for each loci, annotate for orf in loci: loc_start = int(data_dict[orf][0]) loc_end = int(data_dict[orf][1]) if data_dict[orf][2] == 'Y': strand = -1 angle = -195 pos = 'right' else: strand = +1 angle = 15 pos = 'left' feature = SeqFeature(FeatureLocation(loc_start, loc_end), strand=strand) gds_features.add_feature(feature, name=orf + ": " + data_dict[orf][3], label=True, sigil="ARROW", label_size=4, arrowhead_length=0.2, label_angle=angle, label_position=pos, arrowshaft_height=0.3) # for the cluster, annotate inversion positions feature = SeqFeature(FeatureLocation(cluster[0], cluster[0] + 1), strand=0) gds_features.add_feature(feature, name=' START', label=True, color="purple", label_position="left", label_angle=45, sigil='BOX', label_color='purple', label_size=6) feature = SeqFeature(FeatureLocation(cluster[1], cluster[1] + 1), strand=0) gds_features.add_feature(feature, name=' END', label=True, color="purple", label_position="left", label_angle=45, sigil='BOX', label_color='purple', label_size=6) # draw the graph gdd.draw(format='linear', pagesize=(16 * cm, 10 * cm), fragments=1, start=d_start-500, end=d_end+500) gdd.write(fig_path, "pdf")
def seq_plot(sequence: Amplicon, title: str = ""): gdd = GenomeDiagram.Diagram(title) gdt_features = gdd.new_track(1, greytrack=False, scale_smalltick_interval=10, scale_largetick_interval=100) gds_features = gdt_features.new_set() for f in sequence.features: if type(f) is TargetRegion: gds_features.add_feature(f, name="Target", color=colors.blue, label=False) for f in sequence.features: if type(f) is ExcludedRegion: gds_features.add_feature(f, name="Excluded", color=colors.red, label=False) for i, fwd in enumerate(sequence.primer_set_fwd): gds_features.add_feature( fwd, name="Fwd " + str(i), color=colors.green, label=True, sigil="ARROW", label_size=11, arrowhead_length=0.25, ) for i, rev in enumerate(sequence.primer_set_rev): gds_features.add_feature( rev, name="Rev " + str(i), color=colors.green, label=True, sigil="ARROW", label_size=11, arrowhead_length=0.25, ) gdd.draw( format="linear", pagesize=(42 * cm, 7 * cm), fragments=1, start=0, end=len(sequence), ) return gdd.write_to_string()
def make_region_drawing(genelocs, getcolor, centergenename, maxwidth): ''' Makes a PNG figure for regions with a given color mapping, set of gene locations... TODO - Needs better documentation TODO make auto-del tempfiles, or pass svg as string ''' imgfileloc = "/tmp/%s.png" %(sanitizeString(centergenename, False)) # Set up an entry genome diagram object gd_diagram = GenomeDiagram.Diagram("Genome Region") gd_track_for_features = gd_diagram.new_track(1, name="Annotated Features") gd_feature_set = gd_track_for_features.new_set() # Some basic properties of the figure itself arrowshaft_height = 0.3 arrowhead_length = 0.3 default_fontsize = 30 # Font size for genome diagram labels scale = 20 #AA per px for the diagram # Build arrow objects for all of our features. for feature in genelocs: bordercol=rcolors.white if feature.id == centergenename: bordercol=rcolors.red centerdstart, centerend = int(feature.location.start), int(feature.location.end) centerdstrand = feature.strand color = getcolor[feature.qualifiers["cluster_id"]] gd_feature_set.add_feature(feature, name = feature.id, color=color, border = bordercol, sigil="ARROW", arrowshaft_height=arrowshaft_height, arrowhead_length = arrowhead_length, label=False, label_angle=20, label_size = default_fontsize ) start, end = regionlength(genelocs) pagew_px = maxwidth / scale #offset so start of gene of interest lines up in all the figures midcentergene = abs(centerend - centerdstart)/2 + min(centerdstart, centerend) l2mid = abs(midcentergene - start) r2mid = abs(midcentergene - end) roffset = float((pagew_px/2) - (l2mid/scale)) loffset = float((pagew_px/2) - (r2mid/scale)) gd_diagram.draw(format="linear", start=start, end=end, fragments=1, pagesize=(225, pagew_px), xl=(loffset/pagew_px), xr=(roffset/pagew_px) ) gd_diagram.write(imgfileloc, "PNG") #flip for reversed genes if centerdstrand == -1: os.system("convert -rotate 180 %s %s" % (imgfileloc, imgfileloc)) return imgfileloc