def parse_seq(self, web_page): seq_id = self._find_seq_id(web_page) if seq_id is None: return None seq = GoSequence(seq_id, web_page) seq.extract_ID() seq.parse_go_term(self.e_threshold, self.debug) if seq_id and self.key_list and seq_id not in self.key_list: warnings.warn("Seq_ID %s doesn't exist in the list %s" % (seq_id, self.key_list)) return seq
def test_parse_seq(self): infile = self.data_dir + "BLAST/AmiGOBLASTResults_Gene_Local.html" webpage = open(infile, "r") data = "" for line in webpage: data += line # print data seq = GoSequence("gene5", data) seq.extract_ID() seq.parse_go_term(self.e_threshold, self.debug) expected = set(['GO:0005125', 'GO:0016311', 'GO:0046360', 'GO:0003674', 'GO:0030170', 'GO:0004795', 'GO:0005737', 'GO:0006566', 'GO:0005615', 'GO:0005634', 'GO:0006520', 'GO:0005524', 'GO:0008150', 'GO:0070905', 'GO:0008152', 'GO:0009071', 'GO:0008652', 'GO:0006897', 'GO:0005829', 'GO:0005575', 'GO:0009088', 'GO:0004765', 'GO:0016829']) self.assertEqual(expected, seq.combined_terms, "Error!! \nExpected: %s\nActual: %s\n" % (sorted(expected), sorted(seq.combined_terms)))
def run_single(self, debug=0): warnings.simplefilter('always') warnings.warn("Deprecated method: run_BLAST.run_single\nBLAST single sequence, slow!! ", DeprecationWarning) print("Running AmiGO:BLAST") temp_output = open(self.outfile + "_temp", "w") if self.record_index == None: self.record_index = SeqIO.index(self.infile, "fasta") all_orfs = dict() for key in self.record_index: print key this_seq = GoSequence(key, self.record_index[key].seq) # Bio.SeqRecord.SeqRecord this_seq.blast_AmiGO() this_seq.extract_ID() this_seq.parse_go_term(self.e_threshold) # seq.combined_terms self.results[key] = this_seq all_orfs[key] = this_seq.combined_terms # print this_seq # print this_seq.combined_terms temp_output.write("%s \t %s\n" % (key, this_seq.combined_terms)) # temp_output.flush() # temp_output.close() self.counter = self.create_counter(all_orfs) # new_outfile = self.init_output(self.counter,0) # self.sample = self.update_sample_from_counters(new_outfile, self.counter) # hasattr output_csv(self.outfile, self.header, self.counter)
def test_GoConnector_long(self): data = self.record_index["lcl|AE014075.1_gene_3"].seq # # good seq = GoSequence("G3", None) seq.blast_AmiGO(data) seq.extract_ID() seq.parse_go_term(self.e_threshold) expected = set(['GO:0071470', 'GO:0016310', 'GO:0005886', 'GO:0009067', 'GO:0000023', 'GO:0016597', 'GO:0043085', 'GO:0016491', 'GO:0005737', 'GO:0050661', 'GO:0040007', 'GO:0005618', 'GO:0009570', 'GO:0005634', 'GO:0006520', 'GO:0019877', 'GO:0000166', 'GO:0016740', 'GO:0009097', 'GO:0009090', 'GO:0019252', 'GO:0019761', 'GO:0016301', 'GO:0008152', 'GO:0009088', 'GO:0055114', 'GO:0009507', 'GO:0008652', 'GO:0005829', 'GO:0006555', 'GO:0004412', 'GO:0005575', 'GO:0009089', 'GO:0005524', 'GO:0006164', 'GO:0006531', 'GO:0009086', 'GO:0004072', 'GO:0009082']) self.assertEqual(expected, seq.combined_terms)
def test_GoConnector_short(self): data = self.record_index["lcl|AE014075.1_gene_2"].seq # # good seq = GoSequence("G2", None) seq.blast_AmiGO(data) seq.extract_ID() seq.parse_go_term(self.e_threshold) expected = set(['GO:0004803', 'GO:0006313']) self.assertEqual(expected, seq.combined_terms)
def amigo_batch_resume(self): print "RESUME!!! Tempfile exist: %s!" % self.tempfile tempout = open(self.tempfile, "r+") t2File = self.tempfile + "object" with open(t2File, 'r') as f: self.web_session_list = pickle.load(f) total_BLAST = len(self.web_session_list) self.stored_session_id_result = [] self.stored_web_session_info = [0] * total_BLAST line = "" is_parse_result = False is_saving_completed = False for line in tempout.readlines(): line = line.strip() # print line if line.startswith(STORE_SESSION_ID_STRING): index = line.split(self.DELIM) sid = index[2] # self.stored_web_session_info.append((index[1], sid)) self.stored_web_session_info[int(index[1])] = (index[1], sid) if line.startswith(END_SESSION_ID_STRING): is_saving_completed = True if line.startswith(END_STORE_RESULT_STRING): is_parse_result = False if is_parse_result and line.startswith(SEQ_ID_STRING): index = line.split(self.DELIM) # Use $ becasue GO:000251 terms got : already seqid = index[1] seqSet = index[2] # print seqid, seqSet # sset = Ste seq = GoSequence(seqid, seqSet) seq.combined_terms = eval(seqSet) # print seq self.all_seqs.append(seq) if line.startswith(STORE_RESULT_STRING): index = line.split(self.DELIM) sid = index[1] self.stored_session_id_result.append(sid) is_parse_result = True if self.debug: # # These might have to go print "==DEBUG: Full saved session_list:", self.stored_web_session_info print "==DEBUG: Stored sessios_results:", self.stored_session_id_result if not is_saving_completed: print "===Warning!! Not all session_ids are stored, recreate using partial batch mode" return self.amigo_batch_mode_resume_partial() stored_session_id_only = self.rebuild_web_session_list_from_tempobject() complete_index_boolean = [x in self.stored_session_id_result for x in stored_session_id_only] # stored_session_id_only = [ x[1] for x in self.stored_web_session_info] # missiing_session_id = set(stored_session_id_only) - set(self.stored_session_id_result) # missiing_session_id = list(missiing_session_id) missing_length = total_BLAST - sum(complete_index_boolean) print "Missing %d/%d session(s)!" % (missing_length, total_BLAST) if self.debug: missing_session_index = [i for i, is_comp in enumerate(complete_index_boolean) if not is_comp] print "==DEBUG: Missing %d/%d session(s): Index: %s" % (len(missing_session_index), total_BLAST, missing_session_index) print stored_session_id_only print self.stored_session_id_result print complete_index_boolean # exit() self.retrieving_all_session_results(complete_index_boolean, tempout) tempout.close() print "End amigo_batch_resume, number of missed session: %d" % missing_length return missing_length