def _prepare_domains(self, dom_list): """Select domain subsequences from the entire protein sequences.""" def prepare_domains(fasta_dic, dom_list, pfam_scan, out_folder): out_file_dic = {} for acc in dom_list: out_file_dic[acc] = open("%s/%s.fa" % (out_folder, acc), "w") f = open(pfam_scan) f.readline() for line in f: split = line.split() rbp = split[0] start = int(split[3]) stop = int(split[4]) acc = split[5].split('.')[0] if acc in out_file_dic.keys(): out_file_dic[acc].write( ">%s:%i-%i\n%s\n" % (rbp, start, stop, fasta_dic[rbp][start:stop])) f.close() for acc in dom_list: out_file_dic[acc].close() mkdir(self._dom_fold) fasta = fasta_utils.import_fasta(self.fasta) prepare_domains(fasta, dom_list, self.pfam_scan, self._dom_fold)
def _pfam_scan(self): """Scan the sequences against the Pfam database.""" nf = open(self.pfam_scan, "w") nf.write(pfam_utils.search_header()) fasta = fasta_utils.import_fasta(self.fasta) for rbp in sorted(fasta.keys()): seq = fasta[rbp] text = pfam_utils.sequence_search(rbp, seq) nf.write(text) nf.close()
def _pfam_scan(self): """Scan the sequences against the Pfam database.""" nf = open(self.pfam_scan, "w") nf.write(pfam_utils.search_header()) fasta = fasta_utils.import_fasta(self.fasta) if len(fasta) != 1: sys.exit("""Fasta file must contain exactly one sequence.""") for rbp in sorted(fasta.keys()): seq = fasta[rbp] text = pfam_utils.sequence_search(rbp, seq) nf.write(text) nf.close()