def updateDB(self, user, password, server, db): home = os.environ['HOME'] fname = '%s/.phamerator/%s.sql' % (home, db) try: # if this is an autoupdate (not a forced manual one) get a new .sql file from the server # otherwise we'll just use the local copy that we downloaded at the last autoupdate if self.force: print 'updating from local .sql file' else: self.shared.text = 'Downloading updates...' f = urllib2.urlopen('http://%s/%s.sql' % (server, db), timeout=100) print 'using urllib2 to grab %s.sql' % db local_file = open(fname, 'w') local_file.write(f.read()) local_file.close() except: print 'No remote database is available. Sticking with the local version.' return self.shared.text = 'Applying updates...' have_root_credentials = False try: rootdbc = db_conf.db_conf(username='******',password='******', server='localhost', db='mysql') r = rootdbc.get_cursor() have_root_credentials = True except: pass if not have_root_credentials: try: rootdbc = db_conf.db_conf(username='******',password='', server='localhost', db='mysql') r = rootdbc.get_cursor() have_root_credentials = True except: pass while not have_root_credentials: #try: dbSetupWarningDlg = databaseSetupWarningDialog(self.db) dbSetupWarningDlg.run() print '...' try: rootdbc = db_conf.db_conf(username='******',password=dbSetupWarningDlg.pwd, server='localhost', db='mysql') r = rootdbc.get_cursor() except: continue have_root_credentials = True #except: # pass r.execute("DROP DATABASE IF EXISTS %s_temp" % self.db) r.execute("CREATE DATABASE %s_temp" % self.db) r.execute("GRANT ALL ON %s_temp.* TO anonymous@localhost IDENTIFIED BY 'anonymous'" % self.db) r.execute("FLUSH PRIVILEGES") os.system("mysql -u %s -p'%s' %s_temp < %s" % (user, password, db, fname)) # install to a new temp database
def get_server(self, platform, hostname): c = db_conf.db_conf().get_cursor() c.execute("""SELECT id FROM node WHERE hostname = '%s'""" % hostname) node_id = c.fetchone() if node_id: node_id = int(node_id[0]) # if this is the first ever connection for this client, add it to the node table if not node_id: #try: c.execute("""LOCK TABLES gene WRITE, scores WRITE, node WRITE""") #except: self.show_sql_errors(c) c.execute("""INSERT INTO node (platform, hostname) VALUES ('%s', '%s')""" % (platform, hostname)) c.execute("""SELECT id FROM node WHERE platform = '%s' AND hostname = '%s'""" % (platform, hostname)) #try: c.execute("""UNLOCK TABLES""") #except: sql_show_errors(c) node_id = c.fetchone()[0] self._logger.log('registering new node id:' + str(node_id) + ' platform: ' + platform + ' hostname: ' + hostname) # return the server that was accessed the least recently (should be the least busy one) dict = {} for server in self.servers: dict[server.name] = server.get_last_accessed() items = dict.items() items = [(v, k) for (k, v) in items] items.sort() items = [(k, v) for (v, k) in items] self._logger.log(hostname+ ': use' + items[0][0]) return items[0][0]
def run (self): c = db_conf.db_conf().get_cursor() self._logger = logger() prevStale = [] while 1: global keep_alive if not keep_alive: break #print 'looking for stale alignments...' try: c.execute("SELECT COUNT(*) FROM scores WHERE status = 'stale' FOR UPDATE") except: self.show_sql_errors(c) s = c.fetchone() if s: self._logger.log('Adding ' + str(int(s[0])) + ' stale alignments back to the queue.') try: c.execute("UPDATE scores SET status = 'avail' WHERE status = 'stale'") except: self.show_sql_errors(c) #print 'looking for pending alignments...' try: c.execute("SELECT COUNT(*) FROM scores WHERE status = 'pending' FOR UPDATE") except: self.show_sql_errors(c) p = c.fetchone() if p: self._logger.log('Marking ' + str(int(p[0])) + ' pending alignments as stale.') try: c.execute("UPDATE scores SET status = 'stale' WHERE status = 'pending'") except: self.show_sql_errors(c) try: c.execute("COMMIT") except: self.show_sql_errors(c) for i in range(48): if not keep_alive: break time.sleep(5)
def main(): opts = options(sys.argv[1:]).argDict username, password, database, server = opts['user'], opts['password'], opts['database'], opts['server'] table = opts['alignment_type'] db = opts['database'] poll = int(opts['refresh']) c = db_conf.db_conf(username=username, password=password, server=server, db=database).get_cursor() c.execute("SELECT COUNT(*) FROM %s.gene" % db) total = int(c.fetchone()[0]) #print "total:", total pbar = os.popen("zenity --progress --auto-close --title=\"%s progress\" --text=\"\"" % table, "w", 0) timer = poll while 1: if timer == poll: c.execute("SELECT COUNT(*) FROM %s.gene WHERE %s_status = 'done'" % (db, table)) count = int(c.fetchone()[0]) c.execute("COMMIT") p = float(count)/total*100 percent = "%.02f" % p pbar.write(str(percent)+'\n') timer = 0 refresh = str(abs((int(timer)-int(poll)))) if count != total: pbar.write('#'+str(count)+'/'+str(total)+' genes processed. '+table+' is ' +str(percent)+'% completed. Refreshing in '+refresh+' seconds...'+'\n') timer = timer + 1 time.sleep(1) else: print '%s processing is complete.' % table sys.exit()
def __init__(self): '''parse command line arguments, setup a python dictionary to hold phams, and create a nameConverter object''' db = db_conf.db_conf() self.c = db.get_cursor() #self.alignmentType = sys.argv[1] self.alignmentType = 'both' print 'alignment type:', self.alignmentType if self.alignmentType == 'clustalw': #self.clustalwThreshold = float(sys.argv[2]) self.clustalwThreshold = 0.275 print 'clustalw threshold:', self.clustalwThreshold elif self.alignmentType == 'blast': #self.blastThreshold = float(sys.argv[2]) self.blastThreshold = 0.0001 print 'blast threshold:', self.blastThreshold elif self.alignmentType == 'both': #self.blastThreshold, self.clustalwThreshold = float(sys.argv[2]), float(sys.argv[3]) self.blastThreshold, self.clustalwThreshold = 0.0001, 0.275 print 'clustalw threshold:', self.clustalwThreshold print 'blast threshold:', self.blastThreshold else: print 'usage: phamBuilder.py {blast|clustalw|both} [blast_threshold_score] [clustalw_threshold_score]' sys.exit() # dict whose keys are pham names. Each value is a list of genes that are a member of that particular pham self.phams = {} self.nc = nameConverter()
def get_genes_from_PhageID(self, PhageID): self.c = db_conf.db_conf(username=self.username, password=self.password, server=self.server, db=self.database).get_cursor() self.db = pham.db(c=self.c) return phamerator_manage_db.get_genes_from_PhageID(self.c, PhageID)
def get_members_of_pham(self, phamName): self.c = db_conf.db_conf(username=self.username, password=self.password, server=self.server, db=self.database).get_cursor() self.db = pham.db(c=self.c) return phamerator_manage_db.get_members_of_pham(self.c, phamName)
def get_unique_phams(self): self.c = db_conf.db_conf(username=self.username, password=self.password, server=self.server, db=self.database).get_cursor() self.db = pham.db(c=self.c) return phamerator_manage_db.get_unique_phams(self.c)
def get_translation_from_GeneID(self, GeneID): self.c = db_conf.db_conf(username=self.username, password=self.password, server=self.server, db=self.database).get_cursor() self.db = pham.db(c=self.c) return phamerator_manage_db.get_translation_from_GeneID(self.c, GeneID)
def request_seqs(self, server, numSeqs, hostname): self.lastAccessed = time.time() self._logger.log('receiving request for ' + str(numSeqs) + ' sequences to align from ' + hostname + ' using ' + server) c = db_conf.db_conf().get_cursor() try: c.execute("""LOCK TABLES scores WRITE""") except: self.show_sql_errors(c) try: c.execute("SELECT id, query, subject FROM scores WHERE status = 'avail' LIMIT %s", numSeqs) except: self.show_sql_errors(c) seqsToAlign= [] start = time.time() result = c.fetchall() for row in result: id, query, subject = row #print 'marking', id, 'as pending' c.execute("""UPDATE scores SET status = 'pending' WHERE id = %s""" % (id)) try: c.execute("""UNLOCK TABLES""") except: sql_show_errors(c) try: c.execute("COMMIT") except: self.show_sql_errors(c) for row in result: id, query, subject = row c.execute("SELECT translation FROM gene WHERE GeneID = '%s'" % (query)) querySeq = c.fetchone()[0] c.execute("SELECT translation FROM gene WHERE GeneID = '%s'" % (subject)) subjectSeq = c.fetchone()[0] seqsToAlign.append((id, querySeq, subjectSeq)) seqsToAlign.append((id, querySeq, subjectSeq)) try: c.execute("COMMIT") except: self.show_sql_errors(c) self._logger.log(hostname + ' --> getting seqs: ' + str(time.time() - start)) return seqsToAlign
def consolidate_phams(self): """join together phams that share a common member""" changed = False db = db_conf.db_conf() c = db.get_cursor() try: self.c.execute("""SELECT GeneID FROM gene""") except: self.show_sql_errors(self.c) GeneIDs = self.c.fetchall() # for every gene in the 'gene' table for GeneID in GeneIDs: #time.sleep(1) GeneID = GeneID[0] firstPham = None tempPhams = self.phams # for each pham for pham in tempPhams.keys(): # if the current gene is in this pham if GeneID in tempPhams[pham]: # and not yet found in any other pham if not firstPham: # remember that this gene first appeared in this pham firstPham = pham # if this gene is in a different pham else: ##print 'adding ' + str(len(self.phams[pham])) + ' genes from pham', pham, 'to pham', firstPham, 'because', self.nc.GeneID_to_name(GeneID), 'in both' # add all the genes from this pham to the other pham, for gene in self.phams[pham]: if gene not in self.phams[firstPham]: self.phams[firstPham].append(gene) # finally, delete this pham del self.phams[pham] changed = True return changed
def main(): pB = phamBuilder() #pB.reset_pham_table() pB.create_temp_pham_table() pB.construct_phams() c = db_conf.db_conf().get_cursor() nc = nameConverter() while 1: print 'consolidating phams...' changed = pB.consolidate_phams() if not changed: break print 'done' pB.renumber_phams() pB.look_for_parents() pB.save_phams() print "number of phams:", len(pB.phams) print 'mean pham size:', pB.get_mean() for pham in pB.phams: print str(pham) + ':', len(pB.phams[pham]) output = '' for gene in pB.phams[pham]: n =nc.GeneID_to_name(gene) if output: output = output + ',' + n else: output = n output = output + '\n' print output for i in range(1, 2000): n = pB.get_number_of_phams_with_length(i) if n: print i,':', n
def get_scores(self, query, subject): self.c = db_conf.db_conf(username=self.username, password=self.password, server=self.server, db=self.database).get_cursor() self.db = pham.db(c=self.c) return phamerator_manage_db.get_scores(self.c, query, subject)
def create_genome_map(self,phages): self.c = db_conf.db_conf(username=self.username,password=self.password,server=self.server,db=self.database).get_cursor() self.db = pham.db(c = self.c) lengths = [] for phage in phages: length = phamerator_manage_db.get_length_of_genome(self.c,phage["PhageID"]) lengths.append(length) length = max(lengths) self.canvas = goocanvas.Canvas() canvasInterface = cartographer.CanvasInterface(self.c, self.canvas) self.canvas.set_root_item_model(canvasInterface.create_canvas_model(phages, length)) zoomFactor = 20.0 self.canvas.set_bounds(0,0,length/zoomFactor, 100000) needs_new = True while needs_new == True: name = "/tmp/phageSOAP/tmp" + str(hash(str(random.randint(1,64563))+str(hash(self.canvas)))) if os.path.exists(name): needs_new = True else: needs_new = False self.current_genome_map = name surface = cairo.SVGSurface (self.current_genome_map, (length/zoomFactor)+72, 5*72*len(phages)) cr = cairo.Context (surface) cr.translate (36, 130) self.canvas.render (cr, None, 0.1) cr.show_page () print "\n>genome map generated\n" return "genome map generated\n"
def get_gene_start_stop_length_orientation_from_GeneID(self, GeneID): self.c = db_conf.db_conf(username=self.username, password=self.password, server=self.server, db=self.database).get_cursor() self.db = pham.db(c=self.c) return phamerator_manage_db.get_gene_start_stop_length_orientation_from_GeneID( self.c, GeneID)
def get_all_scores(self, alignmentType='both'): self.c = db_conf.db_conf(username=self.username, password=self.password, server=self.server, db=self.database).get_cursor() self.db = pham.db(c=self.c) return phamerator_manage_db.get_all_scores(self.c, alignmentType=alignmentType)
def __init__(self, alignment_type): blast_threshold = float(sys.argv[1]) clustal_threshold = float(sys.argv[2]) db = db_conf.db_conf() c = db.get_cursor() # dict whose keys are pham names. Each value is a list of genes that are a member of that particular pham self.phams = {} try: c.execute("""SELECT GeneID FROM gene""") except: self.show_sql_errors(c) GeneIDs = c.fetchall() # for every gene in the 'gene' table for GeneID in GeneIDs: #time.sleep(1) GeneID = GeneID[0] added = False # get all the alignments for that gene with a good score if alignment_type == 'clustalw': c.execute("""SELECT query, subject, score FROM query_subject, clustalw WHERE ((query = '%s' OR subject = '%s') AND query_subject.id = clustalw.id AND score >= %s)""" % (GeneID, GeneID, clustal_threshold)) elif alignment_type == 'blast': c.execute("""SELECT query, subject, score FROM query_subject, blast WHERE ((query = '%s' OR subject = '%s') AND blast.score <= %s AND blast.id = query_subject.id) UNION SELECT query, subject, score FROM subject_query, blast WHERE ((query = '%s' OR subject = '%s') AND blast.score <= %s AND blast.id = subject_query.id)""" % (GeneID, GeneID, blast_threshold, GeneID, GeneID, blast_threshold)) else: print 'usage: phamBuilder.py {blast|clustalw} threshold_score' alignments = c.fetchall() #print len(alignments), 'genes have a good alignment score with', GeneID # for each good alignment for alignment in alignments: query, subject, score = alignment #print 'query:', query, 'subject:', subject #time.sleep(1) # look at every pham #print 'phams:', self.phams for pham in self.phams.keys(): #time.sleep(1) #print 'query:', query, 'subject:', subject, 'GeneID:', GeneID if GeneID not in self.phams[pham]: # if the 'query' gene is in this pham and is not the current gene, then add the current GeneID (subject) if GeneID != query and query in self.phams[pham]: self.phams[pham].append(GeneID) added = True # if pham == 12: print 'added', GeneID, 'to pham 12 because query', query, 'in this pham. (score = ' + str(score) + ')' #print 'added', GeneID, 'to pham:', pham # if the 'subject' gene is in this pham and is not the current gene, then add the current GeneID (query) elif GeneID != subject and subject in self.phams[pham]: self.phams[pham].append(GeneID) added = True # if pham == 12: print 'added', GeneID, 'to pham 12 because subject', subject, 'in this pham. (score = ' + str(score) + ')' #print 'added', GeneID, 'to pham:', pham if added == False: self.phams.keys().sort() if len(self.phams) > 0: key = self.phams.keys()[-1:][0] + 1 else: key = 0 if self.phams.has_key(key): print 'duplicate key error' sys.exit() #if key == 12: print 'adding', GeneID, 'as the founding member of pham 12' self.phams[key] = [GeneID]
def create_pham_circle(self,phamName,alignmentColor,adjustment,radius): self.c = db_conf.db_conf(username=self.username,password=self.password,server=self.server,db=self.database).get_cursor() self.db = pham.db(c = self.c) filename = str(phamName) + str(alignmentColor) + str(adjustment) + str(radius) filelist = os.listdir(self.cachedir) for item in filelist: if item == filename: self.current_phamCircle = self.cachedir + filename string = "phamCircle " + str(phamName) + " found in cache, using cached version\n" print ">" + string return string self.radius = radius self.alignmentColor = alignmentColor self.threshold = 0.70 self.phamCircleCanvas = goocanvas.Canvas() GeneIDs = phamerator_manage_db.get_members_of_pham(self.c, phamName) memberPhages, self.nonMemberPhages = [], [] for GeneID in GeneIDs: PhageID = phamerator_manage_db.get_PhageID_from_GeneID(self.c, GeneID) if PhageID not in memberPhages: memberPhages.append(PhageID) totalPhages = phamerator_manage_db.get_PhageIDs(self.c) for p in totalPhages: if p not in memberPhages: self.nonMemberPhages.append(phamerator_manage_db.get_phage_name_from_PhageID(self.c, p)) self.l = [] self.genes = [] for a in GeneIDs: for b in GeneIDs: if a != b: for gene in [a, b]: if gene not in self.genes: self.genes.append(gene) clustalwScore, blastScore = phamerator_manage_db.get_scores(self.c, a, b) if clustalwScore >= 0.275: self.l.append((a, b, 'clustalw',clustalwScore)) if blastScore and blastScore <= 0.0001: self.l.append((a, b, 'blast',blastScore)) self.phamCircle = PhamDisplay.PhamCircle(phamName, self.c,verbose=True,radius=self.radius) if self.alignmentColor == True: self.phamCircleCanvas.set_root_item_model(self.phamCircle.create_canvas_model(self.nonMemberPhages, self.genes, self.l,adjustment,self.threshold,blastColor='#ff0000', clustalwColor='#0000ff')) else: phamColorFromDataBase = self.db.select('pham_color','color',name = phamName)[0][0] self.phamCircleCanvas.set_root_item_model(self.phamCircle.create_canvas_model(self.nonMemberPhages, self.genes, self.l,adjustment,self.threshold,allColor = phamColorFromDataBase)) """x, y = (600, 500) self.phamCircleCanvas.set_size_request(x, y) self.defaultPhamCircleCanvasSize = (x, y) self.phamCircleCanvas.show() self.window.window.set_cursor(None) return False""" self.current_phamCircle = self.cachedir + filename self.phamCircleCanvas.set_bounds(0,0, 10000, 10000) surface = cairo.SVGSurface (self.current_phamCircle, 15*72, 15*72) cr = cairo.Context (surface) cr.translate (10, 0) self.phamCircleCanvas.render (cr, None, 0.1) cr.show_page() string = "phamCircle " + str(phamName) + " generated\n" print ">" + string return string
def get_GeneIDs(self, type=None, PhageID=None): self.c = db_conf.db_conf(username=self.username, password=self.password, server=self.server, db=self.database).get_cursor() self.db = pham.db(c=self.c) return phamerator_manage_db.get_GeneIDs(self.c, type=type, PhageID=PhageID)
def __init__(self, logging, c, server, database, opts): Thread.__init__(self) phamServlet.__init__(self,logging, c) Subscriber.__init__(self) #self.setDaemon(1) self.server, self.database = server, database self.c = db_conf.db_conf(username=opts['user'], password=opts['password'], server=opts['server'], db=opts['database']).get_cursor() self.subscribe("clustalw") self._logger = logger.logger(logging) self.publisher = phamPublisher()
def apply(self): have_root_credentials = False try: rootdbc = db_conf.db_conf(username='******',password='******', server='localhost', db='mysql') r = rootdbc.get_cursor() have_root_credentials = True except: pass if not have_root_credentials: try: rootdbc = db_conf.db_conf(username='******',password='', server='localhost', db='mysql') r = rootdbc.get_cursor() have_root_credentials = True except: pass while not have_root_credentials: #try: print 'try to open database setup dialog' dbSetupWarningDlg = databaseSetupWarningDialog(self.database) dbSetupWarningDlg.run() try: rootdbc = db_conf.db_conf(username='******',password=dbSetupWarningDlg.pwd, server='localhost', db='mysql') r = rootdbc.get_cursor() except: continue have_root_credentials = True #except: # pass r.execute("SELECT DISTINCT TABLE_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA='%s_temp'" % self.database) tables = [row[0] for row in r.fetchall()] #r.execute("SET foreign_key_checks = 0") r.execute("DROP DATABASE IF EXISTS %s" % self.database) r.execute("CREATE DATABASE %s" % self.database) print 'renaming tables...' for table in tables: print "RENAME TABLE %s_temp.%s TO %s.%s" % (self.database, table, self.database, table) r.execute("RENAME TABLE %s_temp.%s TO %s.%s" % (self.database, table, self.database, table)) print '...done renaming tables' r.execute("GRANT ALL ON %s.* TO anonymous@localhost IDENTIFIED BY 'anonymous'" % self.database) #r.execute("SET foreign_key_checks = 1") r.execute("FLUSH PRIVILEGES") return True
def __init__(self): phamServlet.__init__(self) Subscriber.__init__(self) Thread.__init__(self) self.subscribe("fasta") self.current_db = BLASTdb() self.lastAccessed = time.time() self.waitTime = random.randint(5,15) self.busy = False self._logger = logger() self.c = db_conf.db_conf().get_cursor()
def __init__(self, logging, c, server, database, opts): Thread.__init__(self) phamServlet.__init__(self, logging, c) Subscriber.__init__(self) self.c = db_conf.db_conf(username=opts['user'], password=opts['password'], server=opts['server'], db=opts['database']).get_cursor() self.server, self.database = server, database self.subscribe("fasta") self.lastAccessed = time.time() self.waitTime = random.randint(5,15) self.busy = False self._logger = logger.logger(logging) self.status = 'avail'
def __init__(self, logging, c, server, database, opts): Thread.__init__(self) phamServlet.__init__(self, logging, c) Subscriber.__init__(self) #self.setDaemon(1) self.server, self.database = server, database self.c = db_conf.db_conf(username=opts['user'], password=opts['password'], server=opts['server'], db=opts['database']).get_cursor() self.subscribe("clustalw") self._logger = logger.logger(logging) self.publisher = phamPublisher()
def main(): opts = options(sys.argv[1:]).argDict username, password, database, server, nsname = opts['user'], opts['password'], opts['database'], opts['server'], opts['nsname'] alignment_type = opts['alignment_type'] print 'username :'******'password :'******'server :', server print 'database :', database if opts['nsname']: Pyro.config.PYRO_NS_HOSTNAME=opts['nsname'] nss=NameServer() nss.start() nss.waitUntilStarted() # wait until the NS has fully started. ess=EventServer() ess.start() ess.waitUntilStarted() # wait until the ES has fully started. server_instances = int(opts['instances']) logging = opts['logging'] daemon=Pyro.core.Daemon(host=nsname) ns=Pyro.naming.NameServerLocator().getNS(host=nsname) daemon.useNameServer(ns) _logger = logger.logger(logging) c = db_conf.db_conf(username=username, password=password, server=opts['server'], db=database).get_cursor() try: c.execute("SET SESSION wait_timeout=2629740;") except: self.show_sql_errors(c) csrCursor = db_conf.db_conf(username=username, password=password, server=opts['server'], db=database).get_cursor() try: csrCursor.execute("SET SESSION wait_timeout=2629740;") except: self.show_sql_errors(csrCursor) csr = checkStaleRows(logging, csrCursor) csr.start() pServer = phamServer(daemon, server_instances, alignment_type, logging, c, username, password, server, database, opts) # run the Pyro loop try: daemon.requestLoop() # if Cntl-C pressed, exit cleanly except (KeyboardInterrupt, SystemExit): pServer.shutdown() _logger.log('waiting for all threads to exit')
def get_fasta_data(self): '''returns a dictionary: key data contains a string, key md5 contains its hexdigest''' c = db_conf.db_conf().get_cursor() c.execute("""SELECT GeneID, translation FROM gene""") results = c.fetchall() fastaDB = {} fasta_data = None for GeneID, translation in results: fasta_data = fasta_data + '\n>' + GeneID + '\n' + translation fastaDB[data] = fasta_data hash = md5.new() hash.update(fasta_data) fastaDB[md5] = hash.hexdigest() return fastaDB
def __init__(self, daemon): self._logger = logger() if Pyro.config.PYRO_MULTITHREADED: self._logger.log('Pyro server running in multithreaded mode') c = db_conf.db_conf().get_cursor() try: c.execute("SET AUTOCOMMIT = 0") except: self.show_sql_error(c) self.reset_stale_rows() self.daemon = daemon self.servers = [] self.servSel = serverSelector(self.daemon) self.servSel.create_servers(server_instances) self._logger.log('Registering serverSelector.') uri=self.daemon.connect(self.servSel, "serverSelector") checkStaleRows().start() self._logger.log('Startup complete. Listening for client connections...')
def __init__(self, logging, c, server, database, opts): Thread.__init__(self) phamServlet.__init__(self, logging, c) Subscriber.__init__(self) self.c = db_conf.db_conf(username=opts['user'], password=opts['password'], server=opts['server'], db=opts['database']).get_cursor() self.server, self.database = server, database self.subscribe("fasta") self.lastAccessed = time.time() self.waitTime = random.randint(5, 15) self.busy = False self._logger = logger.logger(logging) self.status = 'avail'
def get_relatives(self, GeneID, alignmentType='both', blastThreshold=None, clustalwThreshold=None): self.c = db_conf.db_conf(username=self.username, password=self.password, server=self.server, db=self.database).get_cursor() self.db = pham.db(c=self.c) return phamerator_manage_db.get_relatives( self.c, GeneID, alignmentType=alignmentType, blastThreshold=blastThreshold, clustalwThreshold=clustalwThreshold)
def main(argv): phamC = PhamCircle("test","") print "hello main" window = gtk.Window() window.set_default_size(800, 800) window.show() #window.connect("delete_event", on_delete_event) scrolled_win = gtk.ScrolledWindow() scrolled_win.set_shadow_type(gtk.SHADOW_IN) scrolled_win.show() window.add(scrolled_win) phamName = "11" c = db_conf.db_conf(username='******',password='******',server='localhost',db='SEA').get_cursor() GeneIDs = get_members_of_pham(c, phamName) if (True): memberPhages, nonMemberPhages = [], [] for GeneID in GeneIDs: PhageID = get_PhageID_from_GeneID(c, GeneID) if PhageID not in memberPhages: memberPhages.append(PhageID) totalPhages = get_PhageIDs(c) for p in totalPhages: if p not in memberPhages: nonMemberPhages.append(get_phage_name_from_PhageID(c, p)) l = [] genes = [] genes = GeneIDs for a in GeneIDs: clustalwScores, blastScores = get_pham_scores(c, a) for cs in clustalwScores: if cs[2] >= 0.325: l.append((cs[0], cs[1], 'clustalw',cs[2])) for bs in blastScores: if bs[2] <= 1e-50: l.append((bs[0], bs[1], 'blast',bs[2])) phamCircle = PhamCircle(phamName, c) adjustment = 0.325 phamCircleCanvas = goocanvas.Canvas() scrolled_win.add(phamCircleCanvas) phamCircleCanvas.set_root_item_model(phamCircle.create_canvas_model(nonMemberPhages, genes, l,adjustment,'27.0',blastColor='#ff0000', clustalwColor='#0000ff')) x, y = (800, 800) phamCircleCanvas.set_size_request(x, y) defaultPhamCircleCanvasSize = (x, y) phamCircleCanvas.show() window.window.set_cursor(None) gtk.main()
def main(): opts = options(sys.argv[1:]).argDict username, password, database, server = opts['user'], opts[ 'password'], opts['database'], opts['server'] table = opts['alignment_type'] db = opts['database'] poll = int(opts['refresh']) c = db_conf.db_conf(username=username, password=password, server=server, db=database).get_cursor() c.execute("SELECT COUNT(*) FROM %s.gene" % db) total = int(c.fetchone()[0]) #print "total:", total pbar = os.popen( "zenity --progress --auto-close --title=\"%s progress\" --text=\"\"" % table, "w", 0) timer = poll while 1: if timer == poll: c.execute("SELECT COUNT(*) FROM %s.gene WHERE %s_status = 'done'" % (db, table)) count = int(c.fetchone()[0]) c.execute("COMMIT") p = float(count) / total * 100 percent = "%.02f" % p pbar.write(str(percent) + '\n') timer = 0 refresh = str(abs((int(timer) - int(poll)))) if count != total: pbar.write('#' + str(count) + '/' + str(total) + ' genes processed. ' + table + ' is ' + str(percent) + '% completed. Refreshing in ' + refresh + ' seconds...' + '\n') timer = timer + 1 time.sleep(1) else: print '%s processing is complete.' % table sys.exit()
def report_scores(self, id_score, server, hostname): start = time.time() c = db_conf.db_conf().get_cursor() self.lastAccessed = time.time() self._logger.log('receiving scores from ' + hostname + ' using ' + server) for item in id_score: id, score = item if score < 0.0 or score > 1.0: # maybe raise an exception? self._logger.log('error inserting row') cont = None while cont not in ['Y', 'y', 'N', 'n']: cont = raw_input('continue? (y/n)') if cont in ['N', 'n']: self._logger.log('Exiting per user request.') sys.exit() else: # add the alignment score to the database #try: c.execute("""LOCK TABLES node WRITE, scores WRITE""") #except: self.show_sql_errors(c) c.execute("""SELECT id FROM node WHERE hostname = '%s' FOR UPDATE""" % hostname) node_id = c.fetchone() if node_id: node_id = int(node_id[0]) else: print hostname, 'has no node_id but should\n' while cont not in ['Y', 'y', 'N', 'n']: cont = raw_input('continue? (y/n)') if cont in ['N', 'n']: self._logger.log('Exiting per user request.') sys.exit() try: c.execute("""UPDATE scores SET score = ROUND(%s,4), status = 'done', node_id = '%s' WHERE id = %s""" % (score, node_id, id)) except: self.show_sql_error(c) self._logger.log('id: ' + id + ' score: ' + score + ' node_id: ' + node_id) try: c.execute("""COMMIT""") except: sql_show_errors(c) self._logger.log(hostname + ' --> report_scores: ' + str(time.time() - start))
def create_genome_map(self, phages): self.c = db_conf.db_conf(username=self.username, password=self.password, server=self.server, db=self.database).get_cursor() self.db = pham.db(c=self.c) lengths = [] for phage in phages: length = phamerator_manage_db.get_length_of_genome( self.c, phage["PhageID"]) lengths.append(length) length = max(lengths) self.canvas = goocanvas.Canvas() canvasInterface = cartographer.CanvasInterface(self.c, self.canvas) self.canvas.set_root_item_model( canvasInterface.create_canvas_model(phages, length)) zoomFactor = 20.0 self.canvas.set_bounds(0, 0, length / zoomFactor, 100000) needs_new = True while needs_new == True: name = "/tmp/phageSOAP/tmp" + str( hash(str(random.randint(1, 64563)) + str(hash(self.canvas)))) if os.path.exists(name): needs_new = True else: needs_new = False self.current_genome_map = name surface = cairo.SVGSurface(self.current_genome_map, (length / zoomFactor) + 72, 5 * 72 * len(phages)) cr = cairo.Context(surface) cr.translate(36, 130) self.canvas.render(cr, None, 0.1) cr.show_page() print "\n>genome map generated\n" return "genome map generated\n"
#!/usr/bin/env python import db_conf import getpass from phamerator_manage_db import * password = getpass.getpass() c = db_conf.db_conf(username='******', password=password, server='djs-bio.bio.pitt.edu', db='Hatfull').get_cursor() phages = get_phages(c, name=True) for p in phages: print p, str(get_percent_GC(c, name=p)) + '%', get_genome_length( c, name=p), get_number_of_genes(c, name=p)
def __init__(self, alignment_type): blast_threshold = float(sys.argv[1]) clustal_threshold = float(sys.argv[2]) db = db_conf.db_conf() c = db.get_cursor() # dict whose keys are pham names. Each value is a list of genes that are a member of that particular pham self.phams = {} try: c.execute("""SELECT GeneID FROM gene""") except: self.show_sql_errors(c) GeneIDs = c.fetchall() # for every gene in the 'gene' table for GeneID in GeneIDs: #time.sleep(1) GeneID = GeneID[0] added = False # get all the alignments for that gene with a good score if alignment_type == 'clustalw': c.execute( """SELECT query, subject, score FROM query_subject, clustalw WHERE ((query = '%s' OR subject = '%s') AND query_subject.id = clustalw.id AND score >= %s)""" % (GeneID, GeneID, clustal_threshold)) elif alignment_type == 'blast': c.execute( """SELECT query, subject, score FROM query_subject, blast WHERE ((query = '%s' OR subject = '%s') AND blast.score <= %s AND blast.id = query_subject.id) UNION SELECT query, subject, score FROM subject_query, blast WHERE ((query = '%s' OR subject = '%s') AND blast.score <= %s AND blast.id = subject_query.id)""" % (GeneID, GeneID, blast_threshold, GeneID, GeneID, blast_threshold)) else: print 'usage: phamBuilder.py {blast|clustalw} threshold_score' alignments = c.fetchall() #print len(alignments), 'genes have a good alignment score with', GeneID # for each good alignment for alignment in alignments: query, subject, score = alignment #print 'query:', query, 'subject:', subject #time.sleep(1) # look at every pham #print 'phams:', self.phams for pham in self.phams.keys(): #time.sleep(1) #print 'query:', query, 'subject:', subject, 'GeneID:', GeneID if GeneID not in self.phams[pham]: # if the 'query' gene is in this pham and is not the current gene, then add the current GeneID (subject) if GeneID != query and query in self.phams[pham]: self.phams[pham].append(GeneID) added = True # if pham == 12: print 'added', GeneID, 'to pham 12 because query', query, 'in this pham. (score = ' + str(score) + ')' #print 'added', GeneID, 'to pham:', pham # if the 'subject' gene is in this pham and is not the current gene, then add the current GeneID (query) elif GeneID != subject and subject in self.phams[pham]: self.phams[pham].append(GeneID) added = True # if pham == 12: print 'added', GeneID, 'to pham 12 because subject', subject, 'in this pham. (score = ' + str(score) + ')' #print 'added', GeneID, 'to pham:', pham if added == False: self.phams.keys().sort() if len(self.phams) > 0: key = self.phams.keys()[-1:][0] + 1 else: key = 0 if self.phams.has_key(key): print 'duplicate key error' sys.exit() #if key == 12: print 'adding', GeneID, 'as the founding member of pham 12' self.phams[key] = [GeneID]
firstPham = pham # if this gene is in a different pham else: #if firstPham == 12: print 'adding ' + str(len(self.phams[pham])) + ' genes from pham', pham, 'to pham', firstPham, 'because', GeneID, 'in both' # add all the genes from this pham to the other pham, for gene in self.phams[pham]: if gene not in self.phams[firstPham]: self.phams[firstPham].append(gene) # finally, delete this pham del self.phams[pham] changed = True return changed pB = phamBuilder('blast') c = db_conf.db_conf().get_cursor() while 1: changed = pB.consolidate_phams() if not changed: break pC = phamBuilder('clustalw') while 1: changed = pC.consolidate_phams() if not changed: break comparator = pham_comparator.pham_comparator() if sys.argv[3] == 'cc': comparator.compare(pC, pC, sys.argv[3]) elif sys.argv[3] == 'bb': comparator.compare(pB, pB, sys.argv[3]) elif sys.argv[3] == 'bc':
#!/usr/bin/env python import MySQLdb import db_conf c = db_conf.db_conf().get_cursor() c.execute("""SELECT GeneID FROM gene""") querys = c.fetchall() subjects = querys for q in querys: query = q[0] for s in subjects: subject = s[0] if query != subject: try: c.execute("""INSERT INTO blast_scores (query, subject) VALUES ( '%s', '%s')""" % (query, subject)) except: c.execute("SHOW WARNINGS") errors = c.fetchall() for error in errors: print error
usage() argDict = {} for opt, arg in opts: if opt in ("-h", "--help"): usage() sys.exit() elif opt in ("-p", "--password"): argDict['password'] = getpass.getpass("database password: "******"-u", "--user"): argDict['username'] = arg elif opt in ("-d", "--database"): argDict['database'] = arg elif opt in ("-s", "--server"): argDict['server'] = arg return argDict argDict = get_options(sys.argv[1:]) c = db_conf.db_conf(username=argDict['username'], password = argDict['password'], server=argDict['server'], db=argDict['database']).get_cursor() c.execute("SELECT PhageID, name FROM phage") results = c.fetchall() for PhageID, name in results: print PhageID, name if name.find(' ') > -1: name = name.split(' ') print name[-1] c.execute("UPDATE phage SET name = '%s' WHERE PhageID = '%s'" % (name[-1], PhageID)) c.execute("COMMIT")
def get_fasta_from_pham(self, phamName): self.c = db_conf.db_conf(username=self.username,password=self.password,server=self.server,db=self.database).get_cursor() self.db = pham.db(c = self.c) return phamerator_manage_db.get_fasta_from_pham(self.c, phamName)
def get_gene_start_stop_length_orientation_from_GeneID(self, GeneID): self.c = db_conf.db_conf(username=self.username,password=self.password,server=self.server,db=self.database).get_cursor() self.db = pham.db(c = self.c) return phamerator_manage_db.get_gene_start_stop_length_orientation_from_GeneID(self.c, GeneID)
def get_relatives(self, GeneID, alignmentType='both', blastThreshold=None, clustalwThreshold=None): self.c = db_conf.db_conf(username=self.username,password=self.password,server=self.server,db=self.database).get_cursor() self.db = pham.db(c = self.c) return phamerator_manage_db.get_relatives(self.c, GeneID, alignmentType=alignmentType, blastThreshold=blastThreshold, clustalwThreshold=clustalwThreshold)
def main(): argDict = get_options(sys.argv[1:]) # cfg = ConfigParser.RawConfigParser() # cfg.read(os.path.join(os.environ['HOME'], '.my.cnf')) # try: # username = cfg.get('client','user') # except ConfigParser.NoOptionError: if argDict['user']: username = argDict['user'] else: username = raw_input('database username: '******'client','password') # except ConfigParser.NoOptionError: if "password" in argDict: password = getpass.getpass('database password: '******'' database = argDict['database'] server = argDict['server'] cthreshold = argDict['clustalw_threshold'] bthreshold = argDict['blast_threshold'] c = db_conf.db_conf(username=username, password=password, server=server, db=database).get_cursor() db = pham.db(c) GeneIDs = get_GeneIDs(c) # get all the phams that are in the database oldController = pham.PhamController(c, source='db') # create an empty PhamController object that will be populated with phams for pre-existing genes only # this checks to make sure these phams are still valid based on new BLAST scores currentController = pham.PhamController(c) new_genes = [] for GeneID in GeneIDs: if not oldController.find_phams_with_gene(str(GeneID)): #print '%s is not in a pham' % str(GeneID) new_genes.append(str(GeneID)) print 'there are %s genes that are not assigned to a pham' % len(new_genes) print 'ignoring these and verifying the old phams...' # Make sure that the existing phams in the database are still valid based on the # current alignment scores for GeneID in GeneIDs: if GeneID in new_genes: continue relatives = follow_rel_chain2(c, GeneID, [], ignore=new_genes, cthreshold=cthreshold, bthreshold=bthreshold) p = pham.Pham(name=None, members=[], children=[]) p.add_members((GeneID, )) p.add_members(relatives) print 'created pham' try: #print 'creating a new pham and adding it to the currentController:', p currentController.add_pham(p) except pham.DuplicatePhamError: pass #print 'pham %s is already in the current controller:' % p # Look for phams that should be joined because they each contain the same gene. # Join any that are found. for GeneID in GeneIDs: pwg = currentController.find_phams_with_gene( str(GeneID)) #pwg = phams with gene #print 'GeneID: %s, pwg: %s' % (GeneID, pwg) if pwg: # one (and only one) pham already contains this gene, don't do anything if len(pwg) == 1: p = pwg[0] # if this gene is already in more than one pham, join those phams elif len(pwg) > 1: p = currentController.join_phams(pwg) print "there are %s total phams when ignoring new genes" % len( currentController.phams) currentController = currentController - oldController print "there are %s split phams that need to be phixed" % len( currentController.phams) currentController.save() # create an empty PhamController object that will be populated with phams for new and pre-existing genes # the pre-existing phams will be subtracted out of this object's pham list newController = pham.PhamController(c) oldController = pham.PhamController(c, source='db') # for every gene in the database, figure out if it is already in a pham for GeneID in GeneIDs: append = True relatives = follow_rel_chain2(c, GeneID, [], cthreshold=cthreshold, bthreshold=bthreshold) pwg = newController.find_phams_with_gene( str(GeneID)) #pwg = phams with gene #print 'GeneID: %s, pwg: %s' % (GeneID, pwg) if pwg: # one (and only one) pham already contains this gene, don't do anything if len(pwg) == 1: p = pwg[0] # if this gene is already in more than one pham, join those phams elif len(pwg) > 1: ###print 'joining phams...' #for item in pwg: print item.members p = newController.join_phams(pwg) # if this gene isn't in a pham already, create a new pham else: p = pham.Pham(name=None, members=[], children=[]) p.add_members((GeneID, )) # add any genes related to this gene to the pham that it's in p.add_members(relatives) toJoin = [p] for gene in p.members: for result in newController.find_phams_with_gene(gene): if result not in toJoin: toJoin.append(result) if len(toJoin) > 1: #print 'joining phams with these members:' #for j in toJoin: print j, j.name, j.members p = newController.join_phams(toJoin) if append: ###print 'adding pham', p, 'with members:', p.members try: newController.add_pham(p) except: pham.DuplicatePhamError newController = newController - oldController print "there are", len(newController.phams), "new phams." for newPham in newController.phams: print newPham print "Saving..." newController.save()
def __init__(self): self.c = db_conf.db_conf().get_cursor()
def get_GeneIDs(self, type=None, PhageID=None): self.c = db_conf.db_conf(username=self.username,password=self.password,server=self.server,db=self.database).get_cursor() self.db = pham.db(c = self.c) return phamerator_manage_db.get_GeneIDs(self.c, type=type, PhageID=PhageID)
def get_seq_from_GeneID(self, GeneID, extra=None): self.c = db_conf.db_conf(username=self.username,password=self.password,server=self.server,db=self.database).get_cursor() self.db = pham.db(c = self.c) return phamerator_manage_db.get_seq_from_GeneID(self.c, GeneID, extra=extra)
def get_number_of_pham_members(self, phamName, PhageID=None): self.c = db_conf.db_conf(username=self.username,password=self.password,server=self.server,db=self.database).get_cursor() self.db = pham.db(c = self.c) return phamerator_manage_db.get_number_of_pham_members(self.c, phamName, PhageID=PhageID)
def get_scores(self, query, subject): self.c = db_conf.db_conf(username=self.username,password=self.password,server=self.server,db=self.database).get_cursor() self.db = pham.db(c = self.c) return phamerator_manage_db.get_scores(self.c, query, subject)
def get_genes_from_PhageID(self, PhageID): self.c = db_conf.db_conf(username=self.username,password=self.password,server=self.server,db=self.database).get_cursor() self.db = pham.db(c = self.c) return phamerator_manage_db.get_genes_from_PhageID(self.c, PhageID)
#!/usr/bin/env python2.5 import db_conf from phamerator_manage_db import * from getpass import getpass username = '******' password = getpass() server = 'djs-bio.bio.pitt.edu' db = 'Hatfull' c = db_conf.db_conf(username='******',password=password,server=server,db=db).get_cursor() pham_t = get_phams(c) # keys are pham name, values are # of members # like this ... 13L : 2 phams = {} for pham in pham_t: if phams.has_key(pham[0]): phams[pham[0]] += 1 else: phams[pham[0]] = 1 keys = phams.keys() keys.sort() #for key in keys: # print key, phams[key] phams_keyed_by_size = {} for pham_name in phams.keys():
def get_all_scores(self, alignmentType='both'): self.c = db_conf.db_conf(username=self.username,password=self.password,server=self.server,db=self.database).get_cursor() self.db = pham.db(c = self.c) return phamerator_manage_db.get_all_scores(self.c, alignmentType=alignmentType)
def create_pham_circle(self, phamName, alignmentColor, adjustment, radius): self.c = db_conf.db_conf(username=self.username, password=self.password, server=self.server, db=self.database).get_cursor() self.db = pham.db(c=self.c) filename = str(phamName) + str(alignmentColor) + str(adjustment) + str( radius) filelist = os.listdir(self.cachedir) for item in filelist: if item == filename: self.current_phamCircle = self.cachedir + filename string = "phamCircle " + str( phamName) + " found in cache, using cached version\n" print ">" + string return string self.radius = radius self.alignmentColor = alignmentColor self.threshold = 0.70 self.phamCircleCanvas = goocanvas.Canvas() GeneIDs = phamerator_manage_db.get_members_of_pham(self.c, phamName) memberPhages, self.nonMemberPhages = [], [] for GeneID in GeneIDs: PhageID = phamerator_manage_db.get_PhageID_from_GeneID( self.c, GeneID) if PhageID not in memberPhages: memberPhages.append(PhageID) totalPhages = phamerator_manage_db.get_PhageIDs(self.c) for p in totalPhages: if p not in memberPhages: self.nonMemberPhages.append( phamerator_manage_db.get_phage_name_from_PhageID( self.c, p)) self.l = [] self.genes = [] for a in GeneIDs: for b in GeneIDs: if a != b: for gene in [a, b]: if gene not in self.genes: self.genes.append(gene) clustalwScore, blastScore = phamerator_manage_db.get_scores( self.c, a, b) if clustalwScore >= 0.275: self.l.append((a, b, 'clustalw', clustalwScore)) if blastScore and blastScore <= 0.0001: self.l.append((a, b, 'blast', blastScore)) self.phamCircle = PhamDisplay.PhamCircle(phamName, self.c, verbose=True, radius=self.radius) if self.alignmentColor == True: self.phamCircleCanvas.set_root_item_model( self.phamCircle.create_canvas_model(self.nonMemberPhages, self.genes, self.l, adjustment, self.threshold, blastColor='#ff0000', clustalwColor='#0000ff')) else: phamColorFromDataBase = self.db.select('pham_color', 'color', name=phamName)[0][0] self.phamCircleCanvas.set_root_item_model( self.phamCircle.create_canvas_model( self.nonMemberPhages, self.genes, self.l, adjustment, self.threshold, allColor=phamColorFromDataBase)) """x, y = (600, 500) self.phamCircleCanvas.set_size_request(x, y) self.defaultPhamCircleCanvasSize = (x, y) self.phamCircleCanvas.show() self.window.window.set_cursor(None) return False""" self.current_phamCircle = self.cachedir + filename self.phamCircleCanvas.set_bounds(0, 0, 10000, 10000) surface = cairo.SVGSurface(self.current_phamCircle, 15 * 72, 15 * 72) cr = cairo.Context(surface) cr.translate(10, 0) self.phamCircleCanvas.render(cr, None, 0.1) cr.show_page() string = "phamCircle " + str(phamName) + " generated\n" print ">" + string return string