Exemplo n.º 1
0
 def run_hmmtop(self):
     ht = hmmtop.tools()
     ht.add_library('res', self.master_seq_file)
     ht.scan_libraries()
     self.tms = ht.results
     self.hmmtop = []
     gap_seqs = {}
     for i in SeqIO.parse('hmmtop.in', 'fasta'):
         gap_seqs[i.id] = str(i.seq)
     for i, j in ht.results['res'].items():
         for k in j.values():
             korrect = [
                 self.get_pos(k[0], gap_seqs[i]),
                 self.get_pos(k[1], gap_seqs[i])
             ]
             point = range(korrect[0], korrect[1] + 1)
             self.hmmtop.append(median(point))
     graph = {}
     for i in range(1, len(self.master_seq[0]) + 1):
         graph[i] = 0
     for tms in self.hmmtop:
         graph[i] += 1
     for key, hits in graph.items():
         ratio = Decimal(hits) / Decimal(len(self.master_seq))
         self.tms_ratio[key] = ratio
     return True
Exemplo n.º 2
0
 def hmmtop(self):
     db = self.indir + '/hmmtop.db'
     if os.path.exists(db) and self.debug:
         self.tms = pickle.load(open(db, 'r'))
         return
     ht = hmmtop.tools()
     ht.add_library('queries', self.indir + "/myqueries.faa")  # Genome
     ht.add_library('tcdb', self.indir + "/mytcdb.faa")
     ht.scan_libraries()
     pickle.dump(ht.results, open(db, 'wb'))
     self.tms = ht.results
     return
Exemplo n.º 3
0
 def __call__(self):
     if os.path.exists(self.outdir) is False:
         os.mkdir(self.outdir)
     self.tssearch()
     # Swap subjects & targets if needed - subject is always shorter
     if self.swap:
         (self.subject_file,self.target_file) = (self.target_file,self.subject_file)
         (self.subject_name,self.target_name) = (self.target_name,self.subject_name)
         (self.srestrict,self.trestrict) = (self.trestrict,self.srestrict)
     # Set subject & target names
     if self.subject_name is None:
         self.subject_name = self.subject_file.split('/')[-1]
     if self.target_name is None:
         self.target_name = self.target_file.split('/')[-1]
     # Write selected subjects & targets, then run HMMTOP
     subject_ids = self.tssdata.keys()
     target_ids = []
     for i in self.tssdata.values():
         target_ids.extend([x['target_symbol'] for x in i])
     target_ids = list(set(target_ids))
     subjects = SeqIO.parse(self.subject_file,'fasta')
     self.subjects_dict = SeqIO.to_dict(subjects)
     targets = SeqIO.parse(self.target_file,'fasta')
     self.targets_dict = SeqIO.to_dict(targets)
     with open(self.outdir+'/subjects.faa','wb+') as sf:
         for sid in subject_ids:
             SeqIO.write(self.subjects_dict[sid],sf,'fasta')
     with open(self.outdir+'/targets.faa','wb+') as tf:
         for tid in target_ids:
             SeqIO.write(self.targets_dict[tid],tf,'fasta')
     hmt = hmmtop.tools()
     hmt.add_library('subjects',self.outdir+'/subjects.faa')
     hmt.add_library('targets',self.outdir+'/targets.faa')
     hmt.scan_libraries()
     self.tms = hmt.results
     # Run GSAT on all of these
     self.run_global_alignments()
     # Sort global data
     self.globaldata.sort(key=lambda x:float(x['gsat_score']),reverse=True)
     for ii,i in enumerate(self.globaldata):
         try:
             z = int(i['gsat_score'])
             z = 0 if z<0 else z
         except:
             z = 0
         self.globaldata[ii]['gsat_score'] = z
     self.globaldata.sort(key=lambda x:int(x['gsat_score']),reverse=True)
     # Generate HTML report
     self.generate_report()
Exemplo n.º 4
0
 def __call__(self, seq, gap):
     self.annotate = []
     self.gap = str(gap)
     try:
         self.seq = SeqIO.read(seq, 'fasta')
     except:
         self.seq = seq
     try:
         if self.hmmtop is False:
             hmt = hmmtop.tools()
             hmt.add_library("gap", seq)
             hmt.scan_libraries()
             self.global_tms_ranges = hmt.results['gap'][self.seq.id]
         else:
             self.global_tms_ranges = self.hmmtop[self.seq.id]
     except:
         return None
     self.parse_gap()
     self.build_label()
     return self.annotation
Exemplo n.º 5
0
 def __call__(self):
     # First, run HMMTOP of course
     ht = hmmtop.tools()
     ht.add_library('subject', self.subject_file)
     ht.add_library('target', self.target_file)
     ht.scan_libraries()
     # Now, build the TMS sequence
     self.subject_loops = self.build_flanks(self.subject_file,
                                            ht.results['subject'])
     self.target_loops = self.build_flanks(self.target_file,
                                           ht.results['target'])
     # Glue the loops together and write file
     if os.path.exists(self.outdir) is False:
         os.mkdir(self.outdir)
     self.write_fastas()
     # Give names to each unit
     if self.subjectname is None:
         self.subjectname = self.mysubject_file.name.split('/')[-1]
     if self.targetname is None:
         self.targetname = self.mytarget_file.name.split('/')[-1]
     self.build_target_matrix()
     self.cross_matrix()
     self.construct_report()
Exemplo n.º 6
0
def calculate(fasta, label, out):
    #os.environ['BIOV_DEBUG'] = 'True'
    ht = hmmtop.tools()
    ht.add_library('c', fasta)
    ht.scan_libraries()
    tms = ht.results['c']
    total = open(fasta, 'r').read().count('>')
    zeros = total - len(tms.keys())
    tabs = {}
    for i in tms.values():
        count = len(i)
        tabs.setdefault(count, 0)
        tabs[count] += 1
    tabs.setdefault(0, zeros)
    biggest = tabs.keys()
    biggest.sort()
    biggest = biggest[-1]
    [tabs.setdefault(i, 0) for i in range(biggest)]
    xlocations = na.array(range(len(tabs.keys())))
    plt.xticks(xlocations + 0.3 / 2, xlocations)
    plt.bar(xlocations, tabs.values(), width=0.3)
    plt.title(label)
    plt.savefig(out, dpi=80, format="png")
    return tabs, tms
Exemplo n.º 7
0
            #fig.frameon = False
            #fig.add_axes([0,0,0,0],frameon=False)
            fig.set_size_inches(width, 5.5)
            self.outfile = title_in
            plt.savefig(self.outfile,
                        dpi=80,
                        format="png",
                        bbox_inches='tight',
                        pad_inches=0.003)
        else:
            plt.show()


if __name__ == '__main__':

    ht = hmmtop.tools()
    ht.add_library('HT', fastafile.name)
    ht.scan_libraries()
    try:
        tms = ht.results['HT']['what'].values()
    except:
        tms = []
    eh = what(seq, window, angle)
    eh.graph(tms, 'Hydropathy & Amphipathicity', True)
    print '<HTML><head><title>Hydropathy & Amphipathicty plot</title></head><body>'
    print "<img src ='%s'><br><br>" % eh.outfile
    print '<font color="Blue">Blue lines denote Hydropathy</font><br>'
    print '<font color="Red">Red lines denote Amphipathicity</font><br>'
    print '<font color="#C5860D">Orange bars mark transmembrane segments as predicted by HMMTOP</font>'
    print '</body></HTML>'