def get_genes(self): """ Get the genes of the Phamily """ results = get_db().query( "SELECT `gene`.`GeneID`, `gene`.`phageID`, " + " `Length`, `Start`, `Stop`, `Orientation`" + " FROM `gene`" " JOIN `pham` ON `gene`.`GeneID` = `pham`.`GeneID`" + " WHERE `pham`.`name` =%s; ", self.pham_no) genes = {} self.count = len(results) for gene_info in results: gene_id = gene_info[0] phage_id = gene_info[1] start = gene_info[3] stop = gene_info[4] orientation = gene_info[5] gene = new_PhamGene(gene_id, start, stop, orientation, phage_id) if gene.has_valid_start(): genes[gene.gene_id] = gene if len(genes) < 1: raise StarteratorError( "Pham Number %s not found or all genes fail validation!" % self.pham_no) return genes
def _execute(self, cursor, query, params): try: return cursor.execute(query, params) except MySQLdb.OperationalError: print "Error connecting to MySQL on %s", self.host self.close() raise StarteratorError( "Error connecting to database! Please enter correct login credentials in Preferences menu." )
def make_gene(self, start, stop, orientation): sequence = self.get_sequence() try: gene = phamgene.UnPhamGene(self.number, start, stop, orientation, self.name, sequence) except: raise StarteratorError( "The gene could not be made! Check coordinates: Start: %s, Stop: %s, Orientation: %s" % (start, stop, orientation)) self.pham_no = gene.blast() self.get_pham(self.pham_no, gene)
def get_color(self): """ Get the color of the phamily from the database """ try: result = get_db().get( "SELECT `name`, `color`\n\ FROM `pham_color` WHERE `name` = %s;", self.pham_no) return result[1] except: raise StarteratorError("Pham number %s not found in database!" % self.pham_no)
def get_sequence(self): if not self.sequence: try: with open(self.fasta, "rb") as fasta_file: fasta_file.next() sequence = "" for line in fasta_file: sequence += (line.strip()) self.sequence = sequence self.seq_length = len(sequence) except: raise StarteratorError( "The fasta file (%s) could not be opened!" % self.fasta) return self.sequence
def get_pham_no(phage_name, gene_number): """ Gets the pham number of a gene, given the phage name and the gene number """ print phage_name, gene_number db = DB() query = "SELECT pham.Name \n\ FROM gene JOIN pham ON gene.GeneID = pham.GeneID \n\ JOIN phage ON gene.PhageID = phage.PhageID \n\ WHERE (phage.Name LIKE %s or phage.PhageID = %s) AND gene.Name RLIKE %s \n\ " % (phage_name + "%", phage_name, '^[:alpha:]*(_)*%s$' % str(gene_number)) print query try: results = db.query( "SELECT pham.Name \n\ FROM gene JOIN pham ON gene.GeneID = pham.GeneID \n\ JOIN phage ON gene.PhageID = phage.PhageID \n\ WHERE (phage.Name LIKE %s or phage.PhageID = %s) AND gene.Name RLIKE %s", (phage_name + "%", phage_name, '^([[:alnum:]]*_)*([[:alpha:]])*%s$' % str(gene_number))) print "DB query 1" if len(results) < 1: print "DB query 1 failed, try search 2" results = db.query( "SELECT pham.Name \n\ FROM gene JOIN pham ON gene.GeneID = pham.GeneID \n\ JOIN phage ON gene.PhageID = phage.PhageID \n\ WHERE (phage.Name LIKE %s or phage.PhageID = %s) AND gene.geneID RLIKE %s", (phage_name + "%", phage_name, '^([[:alnum:]]*_)*([[:alpha:]])*%s$' % str(gene_number))) if len(results) < 1: #try to determine root of gene names since they are print "DB query 2 failed, try search 3" results = db.query( "SELECT pham.Name \n\ FROM gene JOIN pham ON gene.GeneID = pham.GeneID \n\ JOIN phage ON gene.PhageID = phage.PhageID \n\ WHERE gene.geneid LIKE %s AND gene.geneID RLIKE %s", (phage_name + "%", '^([[:alnum:]]*_)*([[:alpha:]])*%s$' % str(gene_number))) print results row = results[0] pham_no = row[0] return str(pham_no) except: raise StarteratorError("Gene %s of Phage %s not found in database!" % (gene_number, phage_name))
def get_pham_number(phage_name, gene_number): try: db = DB() results = db.query( "SELECT pham.Name \n\ FROM gene JOIN pham ON gene.GeneID = pham.Gene \n\ JOIN phage ON gene.PhageID = phage.PhageID \n\ WHERE phage.Name LIKE %s AND gene.Name LIKE %s \n\ ESCAPE '!'", (phage_name + "%", '%' + str(gene_number))) row = results[0] pham_no = row[0] return str(pham_no) except: raise StarteratorError("Gene %s of Phage %s not found in database!" % (gene_number, phage_name))
def blast(self): # not sure where to put this... this makes more sense, # but I wanted to keep the Genes out of file making... print "Running BLASTp" try: result_handle = open("%s/%s.xml" % (utils.INTERMEDIATE_DIR, self.gene_id)) result_handle.close() except: protein = SeqRecord( self.sequence[self.candidate_starts[0]:].seq.translate(), id=self.gene_id) print protein, self.sequence # short proteins need lower e_value query_len = (self.stop - self.start) / 3 if query_len < 50: e_value = math.pow(10, -5) else: e_value = math.pow(10, -20) SeqIO.write(protein, '%s/%s.fasta' % (utils.INTERMEDIATE_DIR, self.gene_id), 'fasta') blast_command = Blastp( query='%s%s.fasta' % (utils.INTERMEDIATE_DIR, self.gene_id), db="\"%s/\"" % (os.path.abspath(utils.PROTEIN_DB)), evalue=e_value, outfmt=5, out="%s.xml" % (os.path.join(utils.INTERMEDIATE_DIR, self.gene_id))) # print self.gene_id, "\"%sProteins\"" % (utils.PROTEIN_DB) blast_args = [ "%sblastp" % utils.BLAST_DIR, "-out", '%s/%s.xml' % (utils.INTERMEDIATE_DIR, self.gene_id), "-outfmt", "5", "-query", '%s/%s.fasta' % (utils.INTERMEDIATE_DIR, self.gene_id), "-db", "\"%s/Proteins.fasta\"" % (utils.PROTEIN_DB), "-evalue", str(e_value) ] print " ".join(blast_args) try: subprocess.check_call(blast_args) except: raise StarteratorError("Blast could not run!") # print blast_command # stdout, stderr = blast_command() return self.parse_blast()
def get_phams(self): if not self._phams: self._phams = {} sequence = self.get_sequence() genes = [] # try: if self.profile is None: gene_predictions = annotate.auto_annotate(self.fasta) for gene in gene_predictions.genes: gene = phamgene.UnPhamGene(gene.id, gene.start, gene.stop, gene.orientation, self.name, sequence) genes.append(gene) pham_no = gene.blast() if pham_no not in self._phams: self._phams[pham_no] = [] self._phams[pham_no].append(gene) else: try: with open(self.profile, "rbU") as profile: print self.profile, "has been opened!" first_line = profile.readline() first_word = first_line.split()[0] if first_word == "Profile": csv_reader = csv.reader(profile) line = csv_reader.next() print line csv_reader.next() for row in csv_reader: print row feature_type = row[7].strip() print feature_type if feature_type == "ORF": number = row[1].replace('"', "") orientation = row[2] start = int(row[4]) stop = int(row[5]) print number, start, stop, orientation, self.name gene = phamgene.UnPhamGene( number, start, stop, orientation, self.name, sequence) genes.append(gene) pham_no = gene.blast() if pham_no not in self._phams: self._phams[pham_no] = [] self._phams[pham_no].append(gene) else: if first_word == "CDS": profile.seek(0) gene_count = 0 for line in profile: if line[0:3] == "CDS": if line[4:8] == 'join': continue else: gene_count += 1 line2 = line.replace("(", "") line3 = line2.replace(")", "") line_items = line3.split() if line_items[1] == "complement": gene_orientation = "R" gene_start = int(line_items[2]) gene_end = int(line_items[4]) else: gene_orientation = "F" gene_start = int(line_items[1]) gene_end = int(line_items[3]) gene = phamgene.UnPhamGene( gene_count, gene_start, gene_end, gene_orientation, self.name, sequence) genes.append(gene) pham_no = gene.blast() if pham_no not in self._phams: self._phams[pham_no] = [] self._phams[pham_no].append(gene) else: continue else: raise StarteratorError( "The profile file (%s) could not be read correctly! Please make sure it is correct." % self.profile) except: raise StarteratorError( "The profile file (%s) could not be read correctly! Please make sure it is correct." % self.profile) return self._phams