Exemplo n.º 1
0
 def _compose_crosslink(self, index, result, features1, features2):
     for f1, f2, col in result:
         if f1 is None: continue
         if self.no_color: col = colors.color2bw(col)
         tf1 = features1.add_feature(f1, color=col, border=col)
         tf2 = features2.add_feature(f2, color=col, border=col)
         self.diagram.cross_track_links.append(CrossLink(tf1, tf2, col, col))
Exemplo n.º 2
0
            flip = not flip
            q_start, q_end = q_end, q_start
        if s_start > s_end:
            flip = not flip
            s_start, s_end = s_end, s_start
        if flip:
            c = colors.Color(0, 0, 1, alpha=0.25)
            b = False
        else:
            c = colors.Color(1, 0, 0, alpha=0.25)
            b = False
        q_feature = q_set.add_feature(SeqFeature(FeatureLocation(q_start-1, q_end)),
                                                 color=c, border=b)
        s_feature = s_set.add_feature(SeqFeature(FeatureLocation(s_start-1, s_end)),
                                                 color=c, border=b)
        gd_diagram.cross_track_links.append(CrossLink(q_feature, s_feature, c, b))
        #NOTE: We are using the same colour for all the matches,
        #with transparency. This means overlayed matches will appear darker.
        #It also means the drawing order not very important.
        #Note ACT puts long hits at the back, and colours by hit score
    handle.close()

print("Drawing CDS features...")
for f, format in genomes:
    record = records[f]
    feature_set = feature_sets[f]
    #Mark the CDS features
    for cds in record.features:
        if cds.type != "CDS":
            continue
        feature_set.add_feature(cds, sigil="ARROW",
Exemplo n.º 3
0
        if flip:
            c = colors.Color(0, 0, 1, alpha=0.25)
            b = False
        else:
            c = colors.Color(1, 0, 0, alpha=0.25)
            b = False
        q_feature = q_set.add_feature(SeqFeature(
            FeatureLocation(q_start - 1, q_end)),
                                      color=c,
                                      border=b)
        s_feature = s_set.add_feature(SeqFeature(
            FeatureLocation(s_start - 1, s_end)),
                                      color=c,
                                      border=b)
        gd_diagram.cross_track_links.append(
            CrossLink(q_feature, s_feature, c, b))
        #NOTE: We are using the same colour for all the matches,
        #with transparency. This means overlayed matches will appear darker.
        #It also means the drawing order not very important.
        #Note ACT puts long hits at the back, and colours by hit score
    handle.close()

print("Drawing CDS features...")
for f, format in genomes:
    record = records[f]
    feature_set = feature_sets[f]
    #Mark the CDS features
    for cds in record.features:
        if cds.type != "CDS":
            continue
        feature_set.add_feature(cds,
Exemplo n.º 4
0
def crosslinks(fileName, GenBank_1, GenBank_2):
    gd_diagram = GenomeDiagram.Diagram(fileName)
    max_len = 0

    #Open Files and create fasta files to be compared by Ublast
    A_rec = SeqIO.read(GenBank_1, 'gb')
    GB_file_name = GenBank_1
    fasta_file_name_A = GB_file_name[:-3] + ".fasta"
    writeFasta(GB_file_name, fasta_file_name_A)
    B_rec = SeqIO.read(GenBank_2, 'gb')
    GB_file_name = GenBank_2
    fasta_file_name_B = GB_file_name[:-3] + ".fasta"
    writeFasta(GB_file_name, fasta_file_name_B)

    #create the tab file with the Ublast output
    blastfile = ublastfeatures(fasta_file_name_A, fasta_file_name_B)

    Gname = 'nn'  #name of gene to add

    #First section gets the crosslinks from the blast files
    A_vs_B = getCrossLinks(blastfile)

    #print ('(percent, Gene Query, Gene result)')#This prints the list of Blast results for reference
    for item in A_vs_B:
        print item
    # asks user for a gene name to highlight
    gene_search = raw_input(
        "would you like to highlight a specific gene name?\n \t1) Yes\n \t2) No\n"
    )
    if gene_search == "1" or gene_search.lower() == "yes":
        gene_highlight = raw_input(
            "What is the name of the gene you would like to highlight?\n")

        print gene_highlight + " will be printed in red on the genome diagram, all other genes will be grey"
        C_colors = [yellow] * 1 + [orange] * 1 + [brown] * 1 + [
            lightblue
        ] * 1 + [purple] * 1 + [green] * 1 + [grey] * 1
    else:
        gene_highlight = "NONE"
        C_colors = [yellow] * 1 + [
            orange
        ] * 1 + [brown] * 1 + [lightblue] * 1 + [purple] * 1 + [green] * 1 + [
            grey
        ] * 1  #this creates an array of color for the arrows in the GUI
    i = 0  #index of random color to add

    geneColor = grey  #color of gene. Grey= no name

    # Create new features for concatenations
    recs = ("A", "B")
    for rec in recs:
        if rec == "A":
            for loc_a in re.finditer('NNNNNCACACACTTAATTAATTAAGTGTGTGNNNNN',
                                     str(A_rec.seq)):
                concat_feature = SeqFeature(FeatureLocation(loc_a.start(),
                                                            loc_a.start() + 35,
                                                            strand=-1),
                                            id="Concat",
                                            type="CDS",
                                            qualifiers={'product': 'Concat'})
                A_rec.features.append(concat_feature)
        else:
            for loc_b in re.finditer('NNNNNCACACACTTAATTAATTAAGTGTGTGNNNNN',
                                     str(B_rec.seq)):
                concat_feature = SeqFeature(FeatureLocation(loc_b.start(),
                                                            loc_b.start() + 35,
                                                            strand=-1),
                                            id="Concat",
                                            type="CDS",
                                            qualifiers={'product': 'Concat'})
                B_rec.features.append(concat_feature)

    #Read in lists of gene names and types
    with open('Backbones_2_Clean.csv', 'r') as hand1:
        back_b = csv.reader(hand1)
        backbone = list(back_b)
    with open('AntibioticResistanceGenesClean.csv', 'r') as hand2:
        AnRe = csv.reader(hand2)
        An_Re = list(AnRe)

    #this loop adds each gene feature to the record with a color and name
    for record, gene_colors in zip([A_rec, B_rec], [C_colors, C_colors]):

        max_len = max(max_len, len(record))
        gd_track_for_features = gd_diagram.new_track(1,
                                                     name=record.name,
                                                     greytrack=True,
                                                     start=0,
                                                     end=len(record))
        gd_feature_set = gd_track_for_features.new_set()

        for feature in record.features:
            if feature.type != "CDS":
                #Exclude this feature
                continue
            ## Chose Colors of annotations based on gene name
            try:
                Gname = feature.qualifiers['product'][0]
                if Gname == gene_highlight:
                    geneColor = red
                elif gene_highlight == "NONE":
                    geneColor = gene_colors[i % 6]
                # Backbone genes
                elif Gname in backbone[0]:
                    geneColor = blue
                #Antibiotic Resistance Genes
                elif Gname in An_Re[0]:
                    geneColor = green
                #Transposease & Intergrase
                elif Gname == 'Tnp' or Gname == 'Int':
                    geneColor = orange
                else:
                    geneColor = grey
            except KeyError:  #if no gene name make it grey
                Gname = 'No Name'
                geneColor = grey
            gd_feature_set.add_feature(
                feature,
                sigil="BIGARROW",  #this adds gene features to gd_feature_set
                arrowhead_length=.25,
                color=geneColor,
                label=True,
                name=Gname,
                label_position="start",
                label_size=6,
                label_angle=45)
            i += 1  #increment i so that arrows will have a random color

    track_X = gd_diagram.tracks[2]
    track_Y = gd_diagram.tracks[1]

    #this loop adds the cross links so they point to their feature in the diagram
    for score, id_X, id_Y in A_vs_B:
        try:
            feature_X = get_feature(A_rec.features, id_X)
            feature_Y = get_feature(B_rec.features, id_Y)
            color = colors.linearlyInterpolatedColor(colors.white,
                                                     colors.firebrick, 0, 100,
                                                     score)
            link_xy = CrossLink(
                (track_X, feature_X.location.start, feature_X.location.end),
                (track_Y, feature_Y.location.start, feature_Y.location.end),
                color, colors.lightgrey)
            print "Link made	"
            gd_diagram.cross_track_links.append(link_xy)
        except KeyError:
            print "Feature qualifier for crosslink not found"  # for those pesky nameless genes

    gd_diagram.draw(format="linear",
                    pagesize=(1200, 2400),
                    fragments=1,
                    start=0,
                    end=max_len)
    print max_len
    gd_diagram.write(fileName + ".pdf", "PDF")
Exemplo n.º 5
0
        break

# ==========================================================================
# Build links
# ==========================================================================

for id, link in links.iteritems():
    if id in merged_links:
        continue
    feat1 = features[link["fid1"]]
    feat2 = features[link["fid2"]]
    '''
    if "NapDC1_B04" not in name_mapping[feat1["cid"]] and "NapDC1_B04" not in name_mapping[feat2["cid"]]:
        continue
    '''
    color = colors.Color(50, 50, 50, alpha=0)
    gd_link = CrossLink((link["track1"], link["start1"], link["end1"]),
                        (link["track2"], link["start2"], link["end2"]),
                         color, colors.lightgrey, feat1["dir"] != feat2["dir"])
    gd_diagram.cross_track_links.append(gd_link)

print "Number of links: " + str(len(gd_diagram.cross_track_links))
# ==========================================================================
# Write to file
# ==========================================================================

gd_diagram.draw(format="linear", pagesize="A1")
gd_diagram.write(mapping_filename + ".svg", output="SVG", dpi=600)


Exemplo n.º 6
0
                             alpha=(hsp.ident_pct / 200.0))
        loc = FeatureLocation(offset + hsp.query_start,
                              offset + hsp.query_end,
                              strand=0)
        q = gd_contig_features.add_feature(SeqFeature(loc),
                                           color=color,
                                           border=border)
        r_offset = ref_offsets[hack_ncbi_fasta_name(hsp.hit_id)]
        loc = FeatureLocation(r_offset + hsp.hit_start,
                              r_offset + hsp.hit_end,
                              strand=0)
        h = gd_record_features.add_feature(SeqFeature(loc),
                                           color=color,
                                           border=border)
        gd_diagram.cross_track_links.append(
            CrossLink(q, h, color, border, flip))

#Now add the unmatched contigs on outside
position = 0
gd_contig_features = None
unplaced = 0
for contig in SeqIO.parse(assembly_fasta, "fasta"):
    contig_id = contig.id
    if contig_id in contigs_shown:
        continue
    #print("Adding unmapped contig %s (len %i bp), offset now %i" % (contig_id, contig_len, position))
    unplaced += 1
    contig_len = len(contig)
    if output_fasta:
        if min_len <= contig_len:
            fasta_handle.write(contigs.get_raw(contig_id))
Exemplo n.º 7
0
        gB1.add_feature(i, label=False,\
      label_position="start",color=color_atual)

# Marca na figura os trechos sintenicos
for b in blast:
    qstart = int(b.split("\t")[0])
    qend = int(b.split("\t")[1])
    sstart = int(b.split("\t")[2])
    send = int(b.split("\t")[3])
    identidade = (float(b.split("\t")[4])) / 100
    #identidade = (float(b.split("\t")[4])*0.8)/100

    # Detectando inversoes
    qinv = qend - qstart
    sinv = send - sstart

    if (qinv > 0 and sinv > 0) or \
   (qinv < 0 and sinv < 0):
        cor = colors.Color\
      (1,.341176,.341176,identidade)
    else:
        cor = colors.firebrick

    if identidade >= 1:
        gd.cross_track_links.append(CrossLink((gA, \
   qstart, qend),(gB, sstart, send),color=cor))

gd.draw(format="linear", pagesize=(8*cm,29.7*cm), \
fragments=1)

gd.write(name + ".pdf", "PDF")
Exemplo n.º 8
0
    def draw_alignment(self, _gdd, query_id, query_length, blast_hits):
        # draw reference
        gdt_features = _gdd.new_track(1,
                                      greytrack=False,
                                      start=1,
                                      end=self.ref_length)
        gds_features = gdt_features.new_set()
        max_length = 0
        for annot in self.annotation:
            if annot[-1] == '1':
                strand = 1
            elif annot[-1] == '-1':
                strand = -1
            else:
                strand = None
            start, end = int(annot[0]), int(annot[1])
            if end > max_length:
                max_length = end
            feature = SeqFeature(FeatureLocation(start, end),
                                 strand=strand,
                                 id=annot[2])
            gds_features.add_feature(feature,
                                     label=True,
                                     name=annot[2],
                                     sigil='BOX',
                                     label_size=14,
                                     label_angle=0,
                                     arrowhead_length=0.1,
                                     arrowshaft_height=1)
        gds_features.add_feature(SeqFeature(
            FeatureLocation(self.start, self.start + 1)),
                                 label=True,
                                 name='PCR start',
                                 label_size=14,
                                 color='black')
        _gdd.draw(format='linear', fragments=1, start=1, end=max_length)

        # draw query sequence
        gdt_features = _gdd.new_track(1,
                                      greytrack=True,
                                      start=0,
                                      end=query_length,
                                      name=query_id)
        gds_features = gdt_features.new_set()
        for r in blast_hits:
            feature = SeqFeature(FeatureLocation(r[0], r[1]), strand=r[-1])
            gds_features.add_feature(feature, label=False, sigil='BOX')
        gdt_features.greytrack_fontcolor = colors.black
        gdt_features.greytrack_fontsize = 12
        # draw cross link
        for r in blast_hits:
            alpha = self.scale_color(r[4])
            color = colors.linearlyInterpolatedColor(colors.white,
                                                     colors.firebrick, 0, 100,
                                                     alpha)
            feature = SeqFeature(FeatureLocation(r[0], r[1]), strand=r[-1])
            gds_features.add_feature(feature, label=False, sigil='BOX')
            if max(r[2], r[3]) < self.start:
                continue
            link_xy = CrossLink((_gdd.tracks[1], r[0], r[1]),
                                (_gdd.tracks[2], r[2], r[3]), color)
            _gdd.cross_track_links.append(link_xy)
        _gdd.draw(format='linear', fragments=1)
Exemplo n.º 9
0
def plot_multiple_regions_crosslink2(target_protein_list, region_record_list,
                                     plasmid_list, out_name):
    gd_diagram = GenomeDiagram.Diagram("geomic_region")
    feature_sets = []
    max_len = 0
    records = dict((rec.name, rec) for rec in region_record_list)
    n_records = len(region_record_list)

    record_length = [len(record) for record in region_record_list]

    for i, record in enumerate(region_record_list):
        max_len = max(max_len, len(record))
        #print "i", i
        #Allocate tracks 3 (top), 1 (bottom) for region 1 and 2
        #(empty tracks 2 useful white space to emphasise the cross links
        #and also serve to make the tracks vertically more compressed)
        gd_track_for_features = gd_diagram.new_track(
            (2 * n_records - 1) - 2 * i,
            name=record.name,
            greytrack=True,
            height=0.5,
            start=0,
            end=len(record))
        if record.name not in feature_sets:
            feature_sets.append(gd_track_for_features.new_set())
        else:
            print("already in feature_sets!")
            print(record)
            quit

    for x in range(0, len(region_record_list) - 1):
        #print "x", x
        features_X = region_record_list[x].features
        features_Y = region_record_list[x + 1].features
        set_X = feature_sets[x]
        set_Y = feature_sets[x + 1]
        for feature_1 in features_X:
            if feature_1.type != "CDS":
                continue
            for feature_2 in features_Y:
                if feature_2.type != "CDS":
                    continue
                try:

                    group1 = feature_1.qualifiers["orthogroup"][0]
                    group2 = feature_2.qualifiers["orthogroup"][0]

                except:
                    group1 = "one_singleton"
                    group2 = "two_singleton"

                if group1 == group2:
                    border = colors.lightgrey
                    color = colors.lightgrey
                    F_x = set_X.add_feature(SeqFeature(
                        FeatureLocation(feature_1.location.start,
                                        feature_1.location.end,
                                        strand=0)),
                                            color=color,
                                            border=border)
                    F_y = set_Y.add_feature(SeqFeature(
                        FeatureLocation(feature_2.location.start,
                                        feature_2.location.end,
                                        strand=0)),
                                            color=color,
                                            border=border)
                    gd_diagram.cross_track_links.append(
                        CrossLink(F_x, F_y, color, border))

    #for x in range(0,len(region_record_list)-1):
    x = 0
    for n, record in enumerate(region_record_list):
        gd_feature_set = feature_sets[n]
        i = 0

        if plasmid_list[x]:
            #print "PLASMID!!!"
            color1 = colors.HexColor('#2837B7')
            color2 = colors.blue
        else:
            color1 = colors.HexColor('#40F13A')
            color2 = colors.HexColor('#0F600C')

        for feature in record.features:
            if feature.type != "CDS":
                continue
            try:
                a = feature.qualifiers["locus_tag"]
            except:
                # cas des pseudogenes qui sont des CDS mais n'ont pas de protein ID
                continue

            if len(gd_feature_set) % 2 == 0:
                color = color1
            else:
                color = color2

            #try:
            #    try:
            #            group = protein_id2group[feature.qualifiers["protein_id"][0]]
            #    except:
            #            group = protein_id2group[feature.qualifiers["protein_id"][1]]
            #except:
            #    # no group attributed: singleton => special color
            #    color = colors.HexColor('#E104C0')

            for target_protein in target_protein_list:
                if target_protein in feature.qualifiers["locus_tag"]:
                    #print "target prot!"
                    color = colors.red

            gd_feature_set.add_feature(feature,
                                       sigil="ARROW",
                                       color=color,
                                       label=True,
                                       label_position="middle",
                                       label_strand=1,
                                       label_size=12,
                                       label_angle=45)
            i += 1
        x += 1

    #print "max", max_len
    #print "n records", len(region_record_list)
    if len(region_record_list) == 2:
        hauteur = 700
    else:
        hauteur = 250 * len(region_record_list)
    largeur = max(record_length) / 30
    #print "hauteur", hauteur
    #print "largeur", largeur
    #gd_diagram.set_page_size(, orientation)
    if hauteur > largeur:
        gd_diagram.draw(format="linear",
                        pagesize=(hauteur, largeur),
                        orientation='portrait',
                        fragments=1,
                        start=0,
                        end=max_len)
    else:
        gd_diagram.draw(format="linear",
                        pagesize=(hauteur, largeur),
                        orientation='landscape',
                        fragments=1,
                        start=0,
                        end=max_len)
    #print "writing diagram", out_name

    gd_diagram.write(out_name, "SVG")
                track_x_name = name_for_featureset.name
                track_x = name_for_featureset
            if name_for_featureset[feature_number].name == cross_link_feature_B and name_for_featureset.name != str(track_x_name):
                feature_y = name_for_featureset[feature_number]
                track_y_name = name_for_featureset.name
                track_y = name_for_featureset
                color = colors.linearlyInterpolatedColor(colors.white, colors.firebrick,
                                                 0, 100, score)

                border = colors.lightgrey
                # link_xy = CrossLink((track_x, feature_x.location.start, feature_x.location.end),
                #             (track_y, feature_y.location.start, feature_y.location.end),
                #             color, colors.lightgrey)
                # gd_diagram.cross_track_links.append(link_xy)
                
                gd_diagram.cross_track_links.append(CrossLink(feature_x, feature_y, color, border))
                print('MATCH')

    i += 1
    #print (len(name_for_featureset))
    #for score, nameA, nameB in A_vs_B:
        
#if name_for_featureset.name == nameA:
#            print(score)
#            print(nameA)

#print(name_for_featureset.features[4].name)

#for score, x, y in A_vs_B:
#color = colors.linearlyInterpolatedColor(colors.white, colors.firebrick, 0, 100, 50)
#    border = colors.lightgrey 	
Exemplo n.º 11
0
def plot_multiple_regions_crosslink(target_protein_list,
                                    region_record_list,
                                    plasmid_list,
                                    out_name,
                                    biodb_name="chlamydia_03_15",
                                    color_locus_list=[],
                                    flip_record_based_on_first=True,
                                    color_orthogroup_list=[]):

    import matplotlib.cm as cm
    from matplotlib.colors import rgb2hex
    import matplotlib as mpl
    import MySQLdb
    import os
    sqlpsw = os.environ['SQLPSW']

    norm = mpl.colors.Normalize(vmin=-30, vmax=100)
    cmap = cm.Blues
    m = cm.ScalarMappable(norm=norm, cmap=cmap)

    conn = MySQLdb.connect(
        host="127.0.0.1",  # your host, usually localhost
        user="******",  # your username
        passwd=sqlpsw,  # your password
        db="orth_%s" % biodb_name)  # name of the data base
    cursor = conn.cursor()

    gd_diagram = GenomeDiagram.Diagram("geomic_region")
    feature_sets = []
    max_len = 0
    records = dict((rec.name, rec) for rec in region_record_list)

    n_records = len(region_record_list)

    record_length = [len(record) for record in region_record_list]

    if flip_record_based_on_first:
        region_record_list_flip = [region_record_list[0]]
        region_record_list_flip[0].name = region_record_list_flip[
            0].description
        for x in range(0, len(region_record_list) - 1):
            same_strand_count = 0
            different_strand_count = 0
            features_X = region_record_list[x].features
            features_Y = region_record_list[x + 1].features
            for feature_1 in features_X:

                if feature_1.type != "CDS":
                    continue

                for feature_2 in features_Y:
                    if feature_2.type != "CDS":
                        continue
                    try:

                        group1 = feature_1.qualifiers["orthogroup"][0]
                        group2 = feature_2.qualifiers["orthogroup"][0]
                        if group1 == group2:
                            strand1 = feature_1.location.strand
                            strand2 = feature_2.location.strand
                            if strand1 == strand2:
                                same_strand_count += 1
                            else:
                                different_strand_count += 1

                    except:
                        pass

            if different_strand_count > same_strand_count:
                region_record_list[x + 1] = region_record_list[
                    x + 1].reverse_complement(
                        id=region_record_list[x + 1].id,
                        name=region_record_list[x + 1].description)
            else:
                region_record_list[x +
                                   1].name = region_record_list[x +
                                                                1].description

        #region_record_list = region_record_list_flip
    for i, record in enumerate(region_record_list):
        max_len = max(max_len, len(record))
        #Allocate tracks 3 (top), 1 (bottom) for region 1 and 2
        #(empty tracks 2 useful white space to emphasise the cross links
        #and also serve to make the tracks vertically more compressed)
        gd_track_for_features = gd_diagram.new_track(
            (1 * n_records - 1) - 1 * i,
            name=record.name,
            greytrack=True,
            height=0.4,
            start=0,
            end=len(record))
        if record.name not in feature_sets:
            feature_sets.append(gd_track_for_features.new_set())
        else:
            print("already in feature_sets!")
            print(record)
            quit

    #print 'looping....'
    for x in range(0, len(region_record_list) - 1):
        features_X = region_record_list[x].features
        features_Y = region_record_list[x + 1].features
        set_X = feature_sets[x]
        set_Y = feature_sets[x + 1]
        for feature_1 in features_X:

            if feature_1.type != "CDS":
                continue

            for feature_2 in features_Y:
                if feature_2.type != "CDS":
                    continue
                try:

                    group1 = feature_1.qualifiers["orthogroup"][0]
                    group2 = feature_2.qualifiers["orthogroup"][0]

                except:
                    group1 = "one_singleton"
                    group2 = "two_singleton"

                if group1 == group2:
                    border = colors.lightgrey
                    color = colors.lightgrey
                    try:
                        identity = orthogroup_identity_db.check_identity(
                            cursor, feature_1.qualifiers["orthogroup"][0],
                            feature_1.qualifiers["locus_tag"][0],
                            feature_2.qualifiers["locus_tag"][0])
                    except:
                        identity = 0
                        print(
                            "problem with identity table %s and locus %s %s" %
                            (group1, feature_1.qualifiers["locus_tag"][0],
                             feature_1.qualifiers["locus_tag"][0]))

                    color2 = colors.HexColor(
                        rgb2hex(m.to_rgba(float(identity))))
                    border2 = colors.HexColor(
                        rgb2hex(m.to_rgba(float(identity))))

                    F_x = set_X.add_feature(
                        SeqFeature(
                            FeatureLocation(feature_1.location.start,
                                            feature_1.location.end,
                                            strand=0)),
                        color=color,
                        border=border,
                        set_id=feature_1.qualifiers["locus_tag"])
                    F_y = set_Y.add_feature(SeqFeature(
                        FeatureLocation(feature_2.location.start,
                                        feature_2.location.end,
                                        strand=0)),
                                            color=color,
                                            border=border)
                    gd_diagram.cross_track_links.append(
                        CrossLink(F_x, F_y, color2, border2))

    #for x in range(0,len(region_record_list)-1):
    x = 0
    all_locus = []

    for n, record in enumerate(region_record_list):
        gd_feature_set = feature_sets[n]
        i = 0

        if plasmid_list[x]:
            #print "PLASMID!!"
            color1 = colors.HexColor('#2837B7')
            color2 = colors.blue
        else:
            color1 = colors.HexColor('#40F13A')
            color2 = colors.HexColor('#0F600C')

        one_row_locus = []
        for feature in record.features:
            if feature.type == "tblast_target":
                feature.name = 'match'
                gd_feature_set.add_feature(feature,
                                           sigil="BOX",
                                           color="#ff4a0c86",
                                           label=False,
                                           label_position="middle",
                                           label_size=25,
                                           label_angle=0)

            if feature.type == "assembly_gap":
                #print "gap", feature
                feature.location.strand = None
                gd_feature_set.add_feature(feature,
                                           sigil="BOX",
                                           color="red",
                                           label=True,
                                           label_position="middle",
                                           label_strand=1,
                                           label_size=14,
                                           label_angle=40)

            if feature.type == "rRNA":

                gd_feature_set.add_feature(feature,
                                           sigil="ARROW",
                                           color="orange",
                                           label=True,
                                           label_position="middle",
                                           label_strand=1,
                                           label_size=10,
                                           label_angle=40)
                try:
                    one_row_locus.append(feature.qualifiers["locus_tag"][0])
                except:
                    pass
            if feature.type == "tRNA":

                gd_feature_set.add_feature(feature,
                                           sigil="ARROW",
                                           color="orange",
                                           label=True,
                                           label_position="middle",
                                           label_strand=1,
                                           label_size=10,
                                           label_angle=40)
                try:
                    one_row_locus.append(feature.qualifiers["locus_tag"][0])
                except:
                    print('no locus tag for:')
                    print(feature)

            if feature.type == "repeat_region":

                gd_feature_set.add_feature(feature,
                                           sigil="BOX",
                                           color="blue",
                                           label=True,
                                           label_position="middle",
                                           label_strand=1,
                                           label_size=14,
                                           label_angle=40)

            if 'pseudo' in feature.qualifiers:

                gd_feature_set.add_feature(feature,
                                           sigil="OCTO",
                                           color="#6E6E6E",
                                           label=True,
                                           label_position="middle",
                                           label_strand=1,
                                           label_size=10,
                                           label_angle=40)

            elif feature.type != "CDS":
                continue
            else:

                try:
                    a = feature.qualifiers["locus_tag"][0]
                except:
                    # cas des pseudogenes qui sont des CDS mais n'ont pas de protein ID
                    continue

                try:
                    g = feature.qualifiers["orthogroup"][0]
                except:
                    # cas des pseudogenes qui sont des CDS mais n'ont pas de protein ID
                    continue

                if a in color_locus_list:
                    #print '###########################', a, color_locus_list
                    if len(gd_feature_set) % 2 == 0:
                        color = colors.HexColor('#ca4700')
                    else:
                        color = colors.HexColor('#fd7a32')
                else:
                    if len(gd_feature_set) % 2 == 0:
                        color = color1
                    else:
                        color = color2

                if g in color_orthogroup_list:
                    #print '###########################', a, color_locus_list
                    if len(gd_feature_set) % 2 == 0:
                        color = colors.HexColor('#ca4700')
                    else:
                        color = colors.HexColor('#fd7a32')
                else:
                    if len(gd_feature_set) % 2 == 0:
                        color = color1
                    else:
                        color = color2

                #try:
                #    try:
                #            group = protein_id2group[feature.qualifiers["protein_id"][0]]
                #    except:
                #            group = protein_id2group[feature.qualifiers["protein_id"][1]]
                #except:
                #    # no group attributed: singleton => special color
                #    color = colors.HexColor('#E104C0')

                for target_protein in target_protein_list:
                    if target_protein in feature.qualifiers["locus_tag"]:
                        #print "target prot!"
                        color = colors.red

                gd_feature_set.add_feature(feature,
                                           sigil="ARROW",
                                           color=color,
                                           label=True,
                                           label_position="middle",
                                           label_strand=1,
                                           label_size=10,
                                           label_angle=40)
                i += 1
                try:
                    one_row_locus.append(feature.qualifiers["locus_tag"][0])
                except:
                    print('no locus tag for:')
                    print(feature)
        all_locus = one_row_locus + all_locus

        x += 1

    #print "max", max_len
    #print "n record", len(region_record_list)

    if len(region_record_list) == 2:
        hauteur = 300
    else:
        hauteur = 150 * len(region_record_list)
    largeur = max(record_length) / 30
    #print "hauteur", hauteur
    #print "largeur", largeur
    #gd_diagram.set_page_size(, orientation)
    if hauteur > largeur:
        gd_diagram.draw(format="linear",
                        pagesize=(hauteur, largeur),
                        orientation='portrait',
                        fragments=1,
                        start=0,
                        end=max_len)
    else:
        gd_diagram.draw(format="linear",
                        pagesize=(hauteur, largeur),
                        orientation='landscape',
                        fragments=1,
                        start=0,
                        end=max_len)
    #print "writing diagram", out_name

    #gd_diagram.write(out_name, "SVG")

    import io
    from chlamdb.plots import edit_svg

    svg_diagram = io.StringIO()
    gd_diagram.write(svg_diagram, "SVG")
    svg_diagram.flush()
    #gd_diagram

    with_links = edit_svg.edit_svg(svg_diagram.getvalue(), all_locus,
                                   biodb_name)

    with_links.write(out_name)

    png_name = out_name.split('.')[0] + '.png'

    #png_handle = open(png_name, 'w')
    #gd_diagram.write(png_handle, "PNG")
    #png_handle.close()

    try:
        cmd = 'chmod 444 %s' % out_name
    except:
        pass
    from chlamdb.biosqldb import shell_command
    #print cmd
    shell_command.shell_command(cmd)

    return all_locus
            if name_for_featureset[
                    feature_number].name == cross_link_feature_B and name_for_featureset.name != str(
                        track_x_name):
                features_y_link.insert(number,
                                       name_for_featureset[feature_number])
                #feature_y_n = name_for_featureset[feature_number]
                track_y_name = name_for_featureset.name
                track_y = name_for_featureset

                print("Test FEATUREA 2: ")
                print(number)
                print(name_for_featureset[feature_number].name)
                print(features_y_link)

    for feature_x, feature_y in zip(features_x_link, features_y_link):
        #print("SCORE: " + str(count_feature))
        score = A_vs_B[count_feature][0]
        color = colors.linearlyInterpolatedColor(colors.white,
                                                 colors.firebrick, 0, 100,
                                                 score)
        border = colors.lightgrey
        gd_diagram.cross_track_links.append(
            CrossLink(feature_x, feature_y, color, border))
        count_feature += 1

    i += 1

gd_diagram.draw(format='linear', pagesize='A4', fragments=1, start=0)

gd_diagram.write(diagram_name + ".pdf", "pdf")
gd_diagram.write(diagram_name + ".svg", "SVG")
Exemplo n.º 13
0
                print fname

color = colors.Color(0.8, 0.8, 0.8, 0.9)
border = colors.lightgrey

with open("ch_comp.out") as f:
    for line in f:
        ls = line.split()
        id1 = ls[0][7:]
        if id1 in rnconv:
            id1 = rnconv[id1]
        id2 = ls[1]
        print(id1, id2)
        if id1 in rbars1 and id2 in rbars2:
            gd_diagram.cross_track_links.append(
                CrossLink(rbars1[id1], rbars2[id2], color, border))

for pair in [("rrs", "rrs"), ("rrl", "rrl")]:
    if pair[0] in rbars1 and pair[1] in rbars2:
        gd_diagram.cross_track_links.append(
            CrossLink(rbars1[pair[0]], rbars2[pair[1]], color, border))

print border

gd_diagram.draw(format="linear", pagesize='A4', fragments=1)

#gd_diagram.write(name + ".pdf", "PDF")
gd_diagram.write(name + "_linear.png", "PNG")
gd_diagram.write(name + "_linear.eps", "EPS")
#gd_diagram.write(name + ".svg", "SVG")
Exemplo n.º 14
0
                    sys_exit(
                        "Could not find offset key %r for hit %r in dict (query id %r)"
                        % (hack_ncbi_fasta_name(
                            hsp.hit_id), hsp.hit_id, hsp.query_id))
                else:
                    sys_exit(
                        "Could not find offset for hit %r in dict (query id %r)"
                        % (hsp.hit_id, hsp.query_id))
            loc = FeatureLocation(r_offset + hsp.hit_start,
                                  r_offset + hsp.hit_end,
                                  strand=0)
            hit = gd_ref_features.add_feature(SeqFeature(loc),
                                              color=color,
                                              border=border)
            gd_diagram.cross_track_links.append(
                CrossLink(query, hit, color, border, flip))

        offset += SPACER + contig_len

    # Ready for next pairwise comparison,
    reference_fasta = assembly_fasta
    ref_offsets = contig_offsets
    gd_ref_features = gd_contig_features

# Set size based on max track length?
page = (2 * cm + 5 * cm * len(assemblies_fasta), 100 * cm * max_len / 5000000)
gd_diagram.draw(format="linear",
                fragments=1,
                pagesize=page,
                start=0,
                end=max_len)
Exemplo n.º 15
0
def GenomeMap(file, GenomeId, grid=10000, cross=True):
    # print(GenomeId)
    gd_diagram = GenomeDiagram.Diagram('phages')
    with open(file, 'r') as f:
        reader = csv.reader(f)
        data = list(reader)

    records = []
    ref = {}
    for Id in GenomeId:
        try:
            record = SeqIO.read(Id + ".gb", "genbank")
        except FileNotFoundError or IOError or ValueError:
            hd = Entrez.efetch(db="nucleotide",
                               id=Id,
                               rettype='gb',
                               retmode="text")
            record = SeqIO.read(hd, 'genbank')
            fw = open(Id + '.gb', 'w')
            SeqIO.write(record, fw, 'genbank')
            fw.close()
            os.getcwd()
        for i in SeqIO.parse(Id + ".gb", "genbank"):
            ref[Id] = i.annotations['keywords']
        records.append(record)

    feature_sets = {}
    max_len = 0
    for i, record in enumerate(records):
        max_len = max(max_len, len(record))
        gd_track_for_features = gd_diagram.new_track(
            5 - 2 * i,
            name=record.description,
            greytrack=True,
            greytrack_fontsize=16,
            greytrack_labels=1,
            largetick=True,
            smalltick=True,
            scale_ticks=True,
            scale_largeticks=0.5,
            scale_smallticks=0.1,
            scale_largetick_interval=grid,
            scale_smalltick_interval=grid / 20,
            scale_largetick_labels=True,
            start=0,
            end=len(record),
        )
        assert record.name not in feature_sets
        feature_sets[record.id] = gd_track_for_features.new_set()

    for crosslink in data:
        if not cross:
            break
        set_X = feature_sets[crosslink[0].split(' ')[0]]
        set_Y = feature_sets[crosslink[1].split(' ')[0]]
        # 手动划分连接类型时使用
        # score = 100
        # try:
        #     if crosslink[7] == 1 or crosslink[7] == -1:
        #         score = 100
        # except TypeError:
        #     score = 50
        if crosslink[0].split(' ')[0] in CLASS1 and crosslink[1].split(
                ' ')[0] in CLASS1:
            color = colors.linearlyInterpolatedColor(
                colors.green, colors.yellow, 0, len(GenomeId),
                GenomeId.index(crosslink[1].split(' ')[0]))
        elif crosslink[0].split(' ')[0] in CLASS2 and crosslink[1].split(
                ' ')[0] in CLASS2:
            color = colors.linearlyInterpolatedColor(
                colors.purple, colors.red, 0, len(GenomeId),
                GenomeId.index(crosslink[1].split(' ')[0]))
        else:
            color = colors.linearlyInterpolatedColor(
                colors.blue, colors.cyan, 0, len(GenomeId),
                GenomeId.index(crosslink[1].split(' ')[0]))
        # color = list(colors.getAllNamedColors().keys())[GenomeId.index(crosslink[1].split(' ')[0]) * 17 + 17 % 163]
        F_x = set_X.add_feature(
            SeqFeature(
                FeatureLocation(int(crosslink[2]), int(crosslink[3]),
                                strand=0)),
            color=color,
            border=color,
        )
        F_y = set_Y.add_feature(
            SeqFeature(
                FeatureLocation(int(crosslink[4]), int(crosslink[5]),
                                strand=0)),
            color=color,
            border=color,
        )
        link_xy = CrossLink(F_x, F_y, color, color)
        gd_diagram.cross_track_links.append(link_xy)

    for record in records:
        gd_feature_set = feature_sets[record.id]

        # 矫正ori
        for feature in record.features:
            if feature.type == 'rep_origin':
                print(record.description + ' 的起始位点在:' +
                      str(feature.location.start))
                record = record[feature.location.
                                start:] + record[:feature.location.start]
                if record.features[0].strand == -1:
                    print('daole')
                    record = record.reverse_complement(id=True,
                                                       name=True,
                                                       description=True,
                                                       features=True,
                                                       annotations=True,
                                                       letter_annotations=True)
                break
        # 务必绘制反向互补序列时手动开启
        # record = record.reverse_complement(id=True, name=True, description=True, features=True,
        #                                    annotations=True, letter_annotations=True)

        print(record.description + ' 的起始位点已校正')
        # 画features
        i = 0
        if ref[record.id] != ['']:
            for feature in record.features:
                if feature.type != "gene":
                    continue
                color = list(colors.getAllNamedColors().keys())[len(feature) %
                                                                163]
                gd_feature_set.add_feature(feature,
                                           color=color,
                                           label=True,
                                           label_size=10,
                                           label_angle=90,
                                           sigil="ARROW",
                                           arrowshaft_height=1.0,
                                           arrowhead_length=0.1)
                i += 1
        elif ref[record.id] == ['']:
            for feature in record.features:
                if feature.type != "CDS":
                    continue
                color = list(colors.getAllNamedColors().keys())[len(feature) %
                                                                163]
                gd_feature_set.add_feature(feature,
                                           color=color,
                                           label=True,
                                           label_size=10,
                                           label_angle=90,
                                           sigil="ARROW",
                                           arrowshaft_height=1.0,
                                           arrowhead_length=0.2)
                i += 1
                # 用来手动添加重组位点
                # for pos in recombinations:
                #     if pos in record.features:
                #         gd_feature_set.add_feature(feature, color=color, label=True, label_size=10, label_angle=90,
                #                                    sigil="ARROW", arrowshaft_height=1.0, arrowhead_length=0.1)

    if not cross:
        # 用来绘制单一序列
        gd_diagram.draw(format="linear",
                        pagesize='A4',
                        fragments=5,
                        start=0,
                        end=max_len,
                        fragment_size=1)
        gd_diagram.write("T7.pdf", "PDF")
    else:
        # 用来绘制比对序列
        gd_diagram.draw(format="linear",
                        pagesize=(10 * len(GenomeId) * cm, 120 * cm),
                        fragments=1,
                        start=0,
                        end=max_len,
                        fragment_size=1)
        gd_diagram.write(output, "PDF")

    print("已输出为PDF")
    set_Y = feature_sets[Y]
    for score, x, y in X_vs_Y:
        color = colors.linearlyInterpolatedColor(colors.white, colors.firebrick,
                                                 0, 100, score)
        border = colors.lightgrey
        f_x = get_feature(features_X, x)
        F_x = set_X.add_feature(SeqFeature(FeatureLocation(f_x.location.start,
                                                           f_x.location.end,
                                                           strand=0)),
                                color=color, border=border)
        f_y = get_feature(features_Y, y)
        F_y = set_Y.add_feature(SeqFeature(FeatureLocation(f_y.location.start,
                                                           f_y.location.end,
                                                           strand=0)),
                                color=color, border=border)
        gd_diagram.cross_track_links.append(CrossLink(F_x, F_y, color, border))


for record, gene_colors in zip([A_rec, B_rec, C_rec],
                               [A_colors, B_colors, C_colors]):
    gd_feature_set = feature_sets[record.name]

    i = 0
    for feature in record.features:
        if feature.type != "gene":
            # Exclude this feature
            continue
        try:
            g_color = gene_colors[i]
        except IndexError:
            print("Don't have color for %s gene %i" % (record.name, i))
Exemplo n.º 17
0
        identity = float(each_line_split[2])
        query_start = int(each_line_split[6])
        query_end = int(each_line_split[7])
        target_start = int(each_line_split[8])
        target_end = int(each_line_split[9])

        # use color to reflect identity
        color = colors.linearlyInterpolatedColor(colors.white, colors.red, 50,
                                                 100, identity)

        # determine which is which (query/target to contig_1/contig_2)
        # if query is contig_1
        if query == gene1_contig.name:
            link = CrossLink(
                (contig_1_gene_content_track, query_start, query_end),
                (contig_2_gene_content_track, target_start, target_end),
                color=color,
                border=color,
                flip=False)
            diagram.cross_track_links.append(link)

        # if query is contig_2
        elif query == gene2_contig.name:
            link = CrossLink(
                (contig_2_gene_content_track, query_start, query_end),
                (contig_1_gene_content_track, target_start, target_end),
                color=color,
                border=color,
                flip=False)
            diagram.cross_track_links.append(link)

    # Draw and Export
Exemplo n.º 18
0
    def test_diagram_via_object_pdf(self):
        """Construct and draw PDF using object approach."""
        genbank_entry = self.record
        gdd = Diagram('Test Diagram')

        gdt1 = Track('CDS features',
                     greytrack=True,
                     scale_largetick_interval=1e4,
                     scale_smalltick_interval=1e3,
                     greytrack_labels=10,
                     greytrack_font_color="red",
                     scale_format="SInt")
        gdt2 = Track('gene features',
                     greytrack=1,
                     scale_largetick_interval=1e4)

        # First add some feature sets:
        gdfsA = FeatureSet(name='CDS backgrounds')
        gdfsB = FeatureSet(name='gene background')

        gdfs1 = FeatureSet(name='CDS features')
        gdfs2 = FeatureSet(name='gene features')
        gdfs3 = FeatureSet(name='misc_features')
        gdfs4 = FeatureSet(name='repeat regions')

        prev_gene = None
        cds_count = 0
        for feature in genbank_entry.features:
            if feature.type == 'CDS':
                cds_count += 1
                if prev_gene:
                    # Assuming it goes with this CDS!
                    if cds_count % 2 == 0:
                        dark, light = colors.peru, colors.tan
                    else:
                        dark, light = colors.burlywood, colors.bisque
                    # Background for CDS,
                    a = gdfsA.add_feature(SeqFeature(
                        FeatureLocation(feature.location.start,
                                        feature.location.end,
                                        strand=0)),
                                          color=dark)
                    # Background for gene,
                    b = gdfsB.add_feature(SeqFeature(
                        FeatureLocation(prev_gene.location.start,
                                        prev_gene.location.end,
                                        strand=0)),
                                          color=dark)
                    # Cross link,
                    gdd.cross_track_links.append(CrossLink(a, b, light, dark))
                    prev_gene = None
            if feature.type == 'gene':
                prev_gene = feature

        # Some cross links on the same linear diagram fragment,
        f, c = fill_and_border(colors.red)
        a = gdfsA.add_feature(SeqFeature(FeatureLocation(2220, 2230)),
                              color=f,
                              border=c)
        b = gdfsB.add_feature(SeqFeature(FeatureLocation(2200, 2210)),
                              color=f,
                              border=c)
        gdd.cross_track_links.append(CrossLink(a, b, f, c))

        f, c = fill_and_border(colors.blue)
        a = gdfsA.add_feature(SeqFeature(FeatureLocation(2150, 2200)),
                              color=f,
                              border=c)
        b = gdfsB.add_feature(SeqFeature(FeatureLocation(2220, 2290)),
                              color=f,
                              border=c)
        gdd.cross_track_links.append(CrossLink(a, b, f, c, flip=True))

        f, c = fill_and_border(colors.green)
        a = gdfsA.add_feature(SeqFeature(FeatureLocation(2250, 2560)),
                              color=f,
                              border=c)
        b = gdfsB.add_feature(SeqFeature(FeatureLocation(2300, 2860)),
                              color=f,
                              border=c)
        gdd.cross_track_links.append(CrossLink(a, b, f, c))

        # Some cross links where both parts are saddling the linear diagram fragment boundary,
        f, c = fill_and_border(colors.red)
        a = gdfsA.add_feature(SeqFeature(FeatureLocation(3155, 3250)),
                              color=f,
                              border=c)
        b = gdfsB.add_feature(SeqFeature(FeatureLocation(3130, 3300)),
                              color=f,
                              border=c)
        gdd.cross_track_links.append(CrossLink(a, b, f, c))
        # Nestled within that (drawn on top),
        f, c = fill_and_border(colors.blue)
        a = gdfsA.add_feature(SeqFeature(FeatureLocation(3160, 3275)),
                              color=f,
                              border=c)
        b = gdfsB.add_feature(SeqFeature(FeatureLocation(3180, 3225)),
                              color=f,
                              border=c)
        gdd.cross_track_links.append(CrossLink(a, b, f, c, flip=True))

        # Some cross links where two features are on either side of the linear diagram fragment boundary,
        f, c = fill_and_border(colors.green)
        a = gdfsA.add_feature(SeqFeature(FeatureLocation(6450, 6550)),
                              color=f,
                              border=c)
        b = gdfsB.add_feature(SeqFeature(FeatureLocation(6265, 6365)),
                              color=f,
                              border=c)
        gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c))
        f, c = fill_and_border(colors.gold)
        a = gdfsA.add_feature(SeqFeature(FeatureLocation(6265, 6365)),
                              color=f,
                              border=c)
        b = gdfsB.add_feature(SeqFeature(FeatureLocation(6450, 6550)),
                              color=f,
                              border=c)
        gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c))
        f, c = fill_and_border(colors.red)
        a = gdfsA.add_feature(SeqFeature(FeatureLocation(6275, 6375)),
                              color=f,
                              border=c)
        b = gdfsB.add_feature(SeqFeature(FeatureLocation(6430, 6530)),
                              color=f,
                              border=c)
        gdd.cross_track_links.append(
            CrossLink(a, b, color=f, border=c, flip=True))
        f, c = fill_and_border(colors.blue)
        a = gdfsA.add_feature(SeqFeature(FeatureLocation(6430, 6530)),
                              color=f,
                              border=c)
        b = gdfsB.add_feature(SeqFeature(FeatureLocation(6275, 6375)),
                              color=f,
                              border=c)
        gdd.cross_track_links.append(
            CrossLink(a, b, color=f, border=c, flip=True))

        cds_count = 0
        for feature in genbank_entry.features:
            if feature.type == 'CDS':
                cds_count += 1
                if cds_count % 2 == 0:
                    gdfs1.add_feature(feature,
                                      color=colors.pink,
                                      sigil="ARROW")
                else:
                    gdfs1.add_feature(feature, color=colors.red, sigil="ARROW")

            if feature.type == 'gene':
                # Note we set the colour of ALL the genes later on as a test,
                gdfs2.add_feature(feature, sigil="ARROW")

            if feature.type == 'misc_feature':
                gdfs3.add_feature(feature, color=colors.orange)

            if feature.type == 'repeat_region':
                gdfs4.add_feature(feature, color=colors.purple)

        # gdd.cross_track_links = gdd.cross_track_links[:1]

        gdfs1.set_all_features('label', 1)
        gdfs2.set_all_features('label', 1)
        gdfs3.set_all_features('label', 1)
        gdfs4.set_all_features('label', 1)

        gdfs3.set_all_features('hide', 0)
        gdfs4.set_all_features('hide', 0)

        # gdfs1.set_all_features('color', colors.red)
        gdfs2.set_all_features('color', colors.blue)

        gdt1.add_set(gdfsA)  # Before CDS so under them!
        gdt1.add_set(gdfs1)

        gdt2.add_set(gdfsB)  # Before genes so under them!
        gdt2.add_set(gdfs2)

        gdt3 = Track('misc features and repeats',
                     greytrack=1,
                     scale_largetick_interval=1e4)
        gdt3.add_set(gdfs3)
        gdt3.add_set(gdfs4)

        # Now add some graph sets:

        # Use a fairly large step so we can easily tell the difference
        # between the bar and line graphs.
        step = len(genbank_entry) // 200
        gdgs1 = GraphSet('GC skew')

        graphdata1 = apply_to_window(genbank_entry.seq, step, calc_gc_skew,
                                     step)
        gdgs1.new_graph(graphdata1,
                        'GC Skew',
                        style='bar',
                        color=colors.violet,
                        altcolor=colors.purple)

        gdt4 = Track('GC Skew (bar)',
                     height=1.94,
                     greytrack=1,
                     scale_largetick_interval=1e4)
        gdt4.add_set(gdgs1)

        gdgs2 = GraphSet('GC and AT Content')
        gdgs2.new_graph(apply_to_window(genbank_entry.seq, step,
                                        calc_gc_content, step),
                        'GC content',
                        style='line',
                        color=colors.lightgreen,
                        altcolor=colors.darkseagreen)

        gdgs2.new_graph(apply_to_window(genbank_entry.seq, step,
                                        calc_at_content, step),
                        'AT content',
                        style='line',
                        color=colors.orange,
                        altcolor=colors.red)

        gdt5 = Track('GC Content(green line), AT Content(red line)',
                     height=1.94,
                     greytrack=1,
                     scale_largetick_interval=1e4)
        gdt5.add_set(gdgs2)

        gdgs3 = GraphSet('Di-nucleotide count')
        step = len(genbank_entry) // 400  # smaller step
        gdgs3.new_graph(apply_to_window(genbank_entry.seq, step,
                                        calc_dinucleotide_counts, step),
                        'Di-nucleotide count',
                        style='heat',
                        color=colors.red,
                        altcolor=colors.orange)
        gdt6 = Track('Di-nucleotide count',
                     height=0.5,
                     greytrack=False,
                     scale=False)
        gdt6.add_set(gdgs3)

        # Add the tracks (from both features and graphs)
        # Leave some white space in the middle/bottom
        gdd.add_track(gdt4, 3)  # GC skew
        gdd.add_track(gdt5, 4)  # GC and AT content
        gdd.add_track(gdt1, 5)  # CDS features
        gdd.add_track(gdt2, 6)  # Gene features
        gdd.add_track(gdt3, 7)  # Misc features and repeat feature
        gdd.add_track(gdt6, 8)  # Feature depth

        # Finally draw it in both formats, and full view and partial
        gdd.draw(format='circular',
                 orientation='landscape',
                 tracklines=0,
                 pagesize='A0')
        output_filename = os.path.join('Graphics', 'GD_by_obj_circular.pdf')
        gdd.write(output_filename, 'PDF')

        gdd.circular = False
        gdd.draw(format='circular',
                 orientation='landscape',
                 tracklines=0,
                 pagesize='A0',
                 start=3000,
                 end=6300)
        output_filename = os.path.join('Graphics',
                                       'GD_by_obj_frag_circular.pdf')
        gdd.write(output_filename, 'PDF')

        gdd.draw(format='linear',
                 orientation='landscape',
                 tracklines=0,
                 pagesize='A0',
                 fragments=3)
        output_filename = os.path.join('Graphics', 'GD_by_obj_linear.pdf')
        gdd.write(output_filename, 'PDF')

        gdd.set_all_tracks("greytrack_labels", 2)
        gdd.draw(format='linear',
                 orientation='landscape',
                 tracklines=0,
                 pagesize=(30 * cm, 10 * cm),
                 fragments=1,
                 start=3000,
                 end=6300)
        output_filename = os.path.join('Graphics', 'GD_by_obj_frag_linear.pdf')
        gdd.write(output_filename, 'PDF')
Exemplo n.º 19
0
    if i.type == "rRNA":
        color_atual = colors.blue
        gB1.add_feature(i,
                        label=False,
                        label_position="start",
                        color=color_atual)

# Marca na figura os trechos sintenicos
for b in blast:
    qstart = int(b.split("\t")[0])
    qend = int(b.split("\t")[1])
    sstart = int(b.split("\t")[2])
    send = int(b.split("\t")[3])
    identidade = (float(b.split("\t")[4]) * 0.8) / 100

    # Detectando inversoes
    qinv = qend - qstart
    sinv = send - sstart

    if (qinv > 0 and sinv > 0) or (qinv < 0 and sinv < 0):
        cor = colors.Color(1, .341176, .341176, identidade)
    else:
        cor = colors.firebrick

    gd.cross_track_links.append(
        CrossLink((gA, qstart, qend), (gB, sstart, send), color=cor))

gd.draw(format="linear", pagesize=(8 * cm, 29.7 * cm), fragments=1)

gd.write(name + ".pdf", "PDF")