示例#1
0
 def get_genes(self):
     """
         Get the genes of the Phamily
     """
     results = get_db().query(
         "SELECT `gene`.`GeneID`, `gene`.`phageID`, " +
         " `Length`, `Start`, `Stop`, `Orientation`" + " FROM `gene`"
         " JOIN `pham` ON `gene`.`GeneID` = `pham`.`GeneID`" +
         " WHERE `pham`.`name` =%s; ", self.pham_no)
     genes = {}
     self.count = len(results)
     for gene_info in results:
         gene_id = gene_info[0]
         phage_id = gene_info[1]
         start = gene_info[3]
         stop = gene_info[4]
         orientation = gene_info[5]
         gene = new_PhamGene(gene_id, start, stop, orientation, phage_id)
         if gene.has_valid_start():
             genes[gene.gene_id] = gene
     if len(genes) < 1:
         raise StarteratorError(
             "Pham Number %s not found or all genes fail validation!" %
             self.pham_no)
     return genes
示例#2
0
 def _execute(self, cursor, query, params):
     try:
         return cursor.execute(query, params)
     except MySQLdb.OperationalError:
         print "Error connecting to MySQL on %s", self.host
         self.close()
         raise StarteratorError(
             "Error connecting to database! Please enter correct login credentials in Preferences menu."
         )
示例#3
0
 def make_gene(self, start, stop, orientation):
     sequence = self.get_sequence()
     try:
         gene = phamgene.UnPhamGene(self.number, start, stop, orientation,
                                    self.name, sequence)
     except:
         raise StarteratorError(
             "The gene could not be made! Check coordinates: Start: %s, Stop: %s, Orientation: %s"
             % (start, stop, orientation))
     self.pham_no = gene.blast()
     self.get_pham(self.pham_no, gene)
示例#4
0
 def get_color(self):
     """
         Get the color of the phamily from the database
     """
     try:
         result = get_db().get(
             "SELECT `name`, `color`\n\
             FROM `pham_color` WHERE `name` = %s;", self.pham_no)
         return result[1]
     except:
         raise StarteratorError("Pham number %s not found in database!" %
                                self.pham_no)
示例#5
0
 def get_sequence(self):
     if not self.sequence:
         try:
             with open(self.fasta, "rb") as fasta_file:
                 fasta_file.next()
                 sequence = ""
                 for line in fasta_file:
                     sequence += (line.strip())
             self.sequence = sequence
             self.seq_length = len(sequence)
         except:
             raise StarteratorError(
                 "The fasta file (%s) could not be opened!" % self.fasta)
     return self.sequence
示例#6
0
def get_pham_no(phage_name, gene_number):
    """
        Gets the pham number of a gene, given the phage name and the gene number
    """
    print phage_name, gene_number
    db = DB()
    query = "SELECT pham.Name \n\
            FROM gene JOIN pham ON gene.GeneID = pham.GeneID \n\
            JOIN phage ON gene.PhageID = phage.PhageID \n\
            WHERE (phage.Name LIKE %s or phage.PhageID = %s) AND gene.Name RLIKE %s \n\
            " % (phage_name + "%", phage_name,
                 '^[:alpha:]*(_)*%s$' % str(gene_number))
    print query
    try:
        results = db.query(
            "SELECT pham.Name \n\
            FROM gene JOIN pham ON gene.GeneID = pham.GeneID \n\
            JOIN phage ON gene.PhageID = phage.PhageID \n\
            WHERE (phage.Name LIKE %s or phage.PhageID = %s) AND gene.Name RLIKE %s",
            (phage_name + "%", phage_name,
             '^([[:alnum:]]*_)*([[:alpha:]])*%s$' % str(gene_number)))
        print "DB query 1"
        if len(results) < 1:
            print "DB query 1 failed, try search 2"
            results = db.query(
                "SELECT pham.Name \n\
                FROM gene JOIN pham ON gene.GeneID = pham.GeneID \n\
                JOIN phage ON gene.PhageID = phage.PhageID \n\
                WHERE (phage.Name LIKE %s or phage.PhageID = %s) AND gene.geneID RLIKE %s",
                (phage_name + "%", phage_name,
                 '^([[:alnum:]]*_)*([[:alpha:]])*%s$' % str(gene_number)))
        if len(results) < 1:
            #try to determine root of gene names since they are
            print "DB query 2 failed, try search 3"
            results = db.query(
                "SELECT pham.Name \n\
                FROM gene JOIN pham ON gene.GeneID = pham.GeneID \n\
                JOIN phage ON gene.PhageID = phage.PhageID \n\
                WHERE gene.geneid LIKE %s AND gene.geneID RLIKE %s",
                (phage_name + "%",
                 '^([[:alnum:]]*_)*([[:alpha:]])*%s$' % str(gene_number)))

        print results
        row = results[0]
        pham_no = row[0]
        return str(pham_no)
    except:
        raise StarteratorError("Gene %s of Phage %s not found in database!" %
                               (gene_number, phage_name))
示例#7
0
def get_pham_number(phage_name, gene_number):
    try:
        db = DB()
        results = db.query(
            "SELECT pham.Name \n\
            FROM gene JOIN pham ON gene.GeneID = pham.Gene \n\
            JOIN phage ON gene.PhageID = phage.PhageID \n\
            WHERE phage.Name LIKE %s AND gene.Name LIKE %s \n\
            ESCAPE '!'", (phage_name + "%", '%' + str(gene_number)))
        row = results[0]
        pham_no = row[0]
        return str(pham_no)
    except:
        raise StarteratorError("Gene %s of Phage %s not found in database!" %
                               (gene_number, phage_name))
示例#8
0
    def blast(self):
        # not sure where to put this... this makes more sense,
        # but I wanted to keep the Genes out of file making...
        print "Running BLASTp"
        try:
            result_handle = open("%s/%s.xml" %
                                 (utils.INTERMEDIATE_DIR, self.gene_id))
            result_handle.close()
        except:
            protein = SeqRecord(
                self.sequence[self.candidate_starts[0]:].seq.translate(),
                id=self.gene_id)
            print protein, self.sequence
            # short proteins need lower e_value
            query_len = (self.stop - self.start) / 3
            if query_len < 50:
                e_value = math.pow(10, -5)
            else:
                e_value = math.pow(10, -20)

            SeqIO.write(protein,
                        '%s/%s.fasta' % (utils.INTERMEDIATE_DIR, self.gene_id),
                        'fasta')
            blast_command = Blastp(
                query='%s%s.fasta' % (utils.INTERMEDIATE_DIR, self.gene_id),
                db="\"%s/\"" % (os.path.abspath(utils.PROTEIN_DB)),
                evalue=e_value,
                outfmt=5,
                out="%s.xml" %
                (os.path.join(utils.INTERMEDIATE_DIR, self.gene_id)))
            # print self.gene_id, "\"%sProteins\"" % (utils.PROTEIN_DB)
            blast_args = [
                "%sblastp" % utils.BLAST_DIR, "-out",
                '%s/%s.xml' % (utils.INTERMEDIATE_DIR, self.gene_id),
                "-outfmt", "5", "-query",
                '%s/%s.fasta' % (utils.INTERMEDIATE_DIR, self.gene_id), "-db",
                "\"%s/Proteins.fasta\"" % (utils.PROTEIN_DB), "-evalue",
                str(e_value)
            ]
            print " ".join(blast_args)
            try:
                subprocess.check_call(blast_args)
            except:
                raise StarteratorError("Blast could not run!")
        # print blast_command
        # stdout, stderr = blast_command()
        return self.parse_blast()
示例#9
0
    def get_phams(self):
        if not self._phams:
            self._phams = {}
            sequence = self.get_sequence()
            genes = []
            # try:
            if self.profile is None:
                gene_predictions = annotate.auto_annotate(self.fasta)
                for gene in gene_predictions.genes:
                    gene = phamgene.UnPhamGene(gene.id, gene.start, gene.stop,
                                               gene.orientation, self.name,
                                               sequence)
                    genes.append(gene)
                    pham_no = gene.blast()
                    if pham_no not in self._phams:
                        self._phams[pham_no] = []
                    self._phams[pham_no].append(gene)
            else:
                try:
                    with open(self.profile, "rbU") as profile:
                        print self.profile, "has been opened!"
                        first_line = profile.readline()
                        first_word = first_line.split()[0]
                        if first_word == "Profile":
                            csv_reader = csv.reader(profile)
                            line = csv_reader.next()
                            print line
                            csv_reader.next()
                            for row in csv_reader:
                                print row
                                feature_type = row[7].strip()
                                print feature_type
                                if feature_type == "ORF":
                                    number = row[1].replace('"', "")
                                    orientation = row[2]
                                    start = int(row[4])
                                    stop = int(row[5])
                                    print number, start, stop, orientation, self.name
                                    gene = phamgene.UnPhamGene(
                                        number, start, stop, orientation,
                                        self.name, sequence)
                                    genes.append(gene)

                                    pham_no = gene.blast()
                                    if pham_no not in self._phams:
                                        self._phams[pham_no] = []
                                    self._phams[pham_no].append(gene)
                        else:
                            if first_word == "CDS":
                                profile.seek(0)
                                gene_count = 0
                                for line in profile:
                                    if line[0:3] == "CDS":
                                        if line[4:8] == 'join':
                                            continue

                                        else:
                                            gene_count += 1
                                            line2 = line.replace("(", "")
                                            line3 = line2.replace(")", "")
                                            line_items = line3.split()

                                            if line_items[1] == "complement":
                                                gene_orientation = "R"
                                                gene_start = int(line_items[2])
                                                gene_end = int(line_items[4])

                                            else:
                                                gene_orientation = "F"
                                                gene_start = int(line_items[1])
                                                gene_end = int(line_items[3])
                                        gene = phamgene.UnPhamGene(
                                            gene_count, gene_start, gene_end,
                                            gene_orientation, self.name,
                                            sequence)
                                        genes.append(gene)

                                        pham_no = gene.blast()
                                        if pham_no not in self._phams:
                                            self._phams[pham_no] = []
                                        self._phams[pham_no].append(gene)

                                    else:
                                        continue
                            else:
                                raise StarteratorError(
                                    "The profile file (%s) could not be read correctly! Please make sure it is correct."
                                    % self.profile)
                except:
                    raise StarteratorError(
                        "The profile file (%s) could not be read correctly! Please make sure it is correct."
                        % self.profile)
        return self._phams