def process_model_file(args, dbcheckhash, oper, minup_version, model_file): #print "Processing model file..." #sys.stdout.flush() model_kmers = dict() with open(model_file, 'rb') as csv_file: reader = csv.reader(csv_file, delimiter="\t") d = list(reader) #n = len(d) #bar = mk_bar(n) #bar.start() for r in range(0, len(d)): # bar.update(r) #print r kmer = d[r][0] #print kmer mean = d[r][1] # args.model_index] #print type(mean) try: if (float(mean) <= 5): print "Looks like you have a poorly formatted model file. These aren't the means you are looking for.\n" print "The value supplied for " + kmer + " was " + str( mean) terminate_minup(args, dbcheckhash, oper, minup_version) except Exception, err: print "Problem with means - but it isn't terminal - we assume this is the header line!" #if (args.verbose is True): print kmer, mean model_kmers[kmer] = mean
def __init__(self, dbcheckhash, oper, db, args, xml_file_dict, check_read_args, minup_version, bwaclassrunner): self.creates, xml_file_dict = \ file_dict_of_folder(args, xml_file_dict, args.watchdir) self.processed = dict() self.running = True self.rawcount = dict() self.rawprocessed = dict() self.p = multiprocessing.Pool(args.procs) # self.p = multiprocessing.Pool(multiprocessing.cpu_count()) self.kmerhashT = dict() self.kmerhashC = dict() self.args = args self.oper = oper self.db = db self.check_read_args = check_read_args self.xml_file_dict = xml_file_dict self.minup_version = minup_version self.hdf = '' self.bwaclassrunner = bwaclassrunner ''' print "Sorting files by timestamps...." sys.stdout.flush() self.sortedFiles = sorted(self.creates.items(), key=lambda x: x[1]) ''' t = threading.Thread(target=self.processfiles) t.daemon = True try: t.start() except (KeyboardInterrupt, SystemExit): # MS -- Order here is critical ... print 'Ctrl-C entered -- exiting' t.clear() t.stop() self.p.close() self.p.terminate() terminate_minup(args, dbcheckhash, oper, self.minup_version) exit_gracefully(args, dbcheckhash, self.minup_version) sys.exit(1) if args.bwa_align is True and args.ref_fasta is not False: fasta_file = args.ref_fasta seqlen = get_seq_len(fasta_file) # print type(seqlen) if args.verbose == "high": print seqlen shortestSeq = np.min(seqlen.values()) if args.verbose == "high": print shortestSeq if args.verbose == "high": print args.largerRef ''' # DEPRECARTINE LARGE REF MS 11.10.16 if not args.largerRef and shortestSeq > 10 ** 8: if args.verbose == "high": print "Length of references is >10^8: processing may be *EXTREMELY* slow. To overide rerun using the '-largerRef' option" # MS terminate_minup(args, dbcheckhash, oper, self.minup_version) elif not args.largerRef and shortestSeq > 10 ** 7: if args.verbose == "high": print "Length of references is >10^7: processing may be *VERY* slow. To overide rerun using the '-largerRef' option" # MS terminate_minup(args, dbcheckhash, oper, self.minup_version) else: if args.verbose == "high": print 'Length of references is <10^7: processing should be ok .... continuing .... ' # MS ''' # model_file = "model.txt" # model_kmer_means=process_model_file(model_file) if args.preproc is True: # and args.prealign is True: model_file_template = \ 'template.model' model_file_complement = \ 'complement.model' model_kmer_means_template = \ process_model_file(args, dbcheckhash, oper, self.minup_version, model_file_template) model_kmer_means_complement = \ process_model_file(args, dbcheckhash, oper, self.minup_version, model_file_complement) # model_kmer_means = retrieve_model() # global kmerhash # kmerhash = process_ref_fasta_raw(fasta_file,model_kmer_means) self.kmerhashT = process_ref_fasta_raw( fasta_file, model_kmer_means_template) self.kmerhashC = process_ref_fasta_raw( fasta_file, model_kmer_means_complement)
def emergencyexit2(signum, frame): print 'stopping monitor....' observer.stop() terminate_minup(args, dbcheckhash, OPER, MINUP_VERSION)
def processfiles(self): args = self.args db = self.db oper = self.oper xml_file_dict = self.xml_file_dict connection_pool, minup_version, \ comments, ref_fasta_hash, dbcheckhash, \ logfolder, cursor = self.check_read_args # analyser=RawAnalyser() everyten = 0 customtimeout = 0 # if args.timeout_true is not None: # timeout=args.timeout_true ip = startMincontrol(args, cursor, dbcheckhash,\ minup_version, oper) while self.running: ts = time.time() if args.preproc is True: print datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S' ), 'CACHED:', len(self.creates), 'PROCESSED:', \ len(self.processed), 'RAW FILES:', \ len(self.rawcount), 'RAW WARPED:', \ len(self.rawprocessed) else: print datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S' ), 'CACHED:', len(self.creates), 'PROCESSED:', \ len(self.processed) # MS UPDATE SummaryStatsTable .... if args.customup is True: #print "In customup" if len(self.creates) > 0: customtimeout = 0 else: customtimeout += 1 if customtimeout > 6: terminate_minup(args, dbcheckhash, oper, self.minup_version) ''' ks = self.creates.keys() n = len(ks) bar = mk_bar(n) bar.start() bar.update(10*n/100) bar.update(25*n/100) bar.update(75*n/100) bar.update(100*n/100) bar.finish() sys.stdout.flush() ''' ''' if args.verbose is False and args.debug is False: #print "Processing files ..." #sys.stdout.flush() #n = len(sortedFiles) #bar = mk_bar(n) #bar.start() #i=0 # ??? MS if len(self.creates.keys())==0: print "No files found." terminate_minup(args, dbcheckhash, oper, self.minup_version) exit_gracefully(args, dbcheckhash, self.minup_version) sys.exit() ''' print "Sorting files by timestamps...." sys.stdout.flush() self.sortedFiles = sorted(self.creates.items(), key=lambda x: x[1]) metadata_sql_list = [] for (fast5file, createtime) in self.sortedFiles: ''' #if args.verbose is False and args.debug is False: #bar.update(i) # self.processed) #i+=1 ''' if args.verbose in ["high", "low"]: print "Processing: ", fast5file print int(createtime), time.time() # tn=time.time() if int(createtime) + 20 < time.time(): # file created 20 sec ago, so should be complete .... if fast5file not in self.processed.keys(): # minoTour Metadata Adding .... minoTour_meta_file = args.watchdir + os.sep + "minoTour_meta.txt" if args.verbose == "high": print minoTour_meta_file if os.path.isfile(minoTour_meta_file): try: add_metadata_to_hdf(args, minoTour_meta_file, fast5file) except: print "Adding metadata failed." pass else: "No minoTour_meta.txt file." sys.stdout.flush() if args.debug is True: try: self.do_file_processing( fast5file, db, connection_pool, minup_version, comments, ref_fasta_hash, dbcheckhash, logfolder, cursor, metadata_sql_list, ip) except Exception, err: #if self.hdf: # CI # self.hdf.close() # CI # print "This is a pre basecalled file" print "MyHandler(): except -- " + fast5file err_string = \ 'Error with fast5 file: %s : %s' \ % (fast5file, err) #print >> sys.stderr, err_string print err_string print "X" * 80 debug() sys.exit() #moveFile(args, fast5file) #if args.verbose == "high": sys.exit() #return () else: self.do_file_processing( fast5file, db, connection_pool, minup_version, comments, ref_fasta_hash, dbcheckhash, logfolder, cursor, metadata_sql_list, ip) everyten += 1 if everyten == 25: tm = time.time() if ts + 5 < tm: # just to stop it printing two status messages one after the other. if args.preproc is True: print datetime.datetime.fromtimestamp(tm).strftime('%Y-%m-%d %H:%M:%S' ), 'CACHED:', len(self.creates), 'PROCESSED:', \ len(self.processed), 'RAW FILES:', len(self.rawcount), \ 'RAW WARPED:', len(self.rawprocessed) else: print datetime.datetime.fromtimestamp(tm).strftime('%Y-%m-%d %H:%M:%S' ), 'CACHED:', len(self.creates), 'PROCESSED:', \ len(self.processed) everyten = 0 ''' if args.verbose is False and args.debug is False: #bar.finish() print "... finished processing files." sys.stdout.flush() ''' time.sleep(5)
def check_read(db, args, connection_pool, minup_version, comments, xml_file_dict, ref_fasta_hash, dbcheckhash, logfolder, filepath, hdf, cursor, oper, ip): global runindex if args.verbose == "high": print "Checking read ..." sys.stdout.flush() filename = os.path.basename(filepath) if args.verbose == "high": print time.strftime('%Y-%m-%d %H:%M:%S'), 'processing:', filename sys.stdout.flush() # Remove "_sequencing_run_" and/or "_mux_scan_" from filename_ # as used to ake dbname ... filename_ = filename filename_ = filename_.replace("_sequencing_run_", '_') filename_ = filename_.replace("_mux_scan_", '_') parts = filename_.split('_') strSep = '_' # Changing the number below enables the removal of the random four digit number from run names on restart dbname = strSep.join(parts[0:len(parts) - 5]) dbname = re.sub('[.!,; ]', '', dbname) if len(args.custom_name) > 0: dbname = args.minotourusername + '_' + args.custom_name + '_' \ + dbname else: dbname = args.minotourusername + '_' + dbname if len(dbname) > 64: dbname = dbname[:64] if dbname.endswith('_'): #ml dbname = dbname[:-1] #ml if args.verbose == "high": print "dbname is ", dbname print "Parts were ", parts debug() # --------------------------------------------------------------------------- if dbname in dbcheckhash[ 'dbname']: # so data from this run has been seen before in this instance of minup so switch to it! if dbcheckhash['dbname'][dbname] is False: if args.verbose == "high": print 'switching to database: ', dbname sys.stdout.flush() sql = 'USE %s' % dbname args, db, cursor = cursor_execute(args, db, cursor, sql) # --------------------------------------------------------------------------- try: runindex = dbcheckhash['runindex'][dbname] # MS .. except: print "checkRead(): line 112, dbcheckhash, key error: " \ + dbname sys.stdout.flush() #sys.exit() return () comment_string = 'minUp switched runname' start_time = time.strftime('%Y-%m-%d %H:%M:%S') sql = \ "INSERT INTO Gru.comments (runindex,runname,user_name,comment,name,date) VALUES (%s,'%s','%s','%s','%s','%s')" \ % ( runindex, dbname, args.minotourusername, comment_string, args.minotourusername, start_time, ) if args.verbose == "high": print sql debug() db.escape_string(sql) args, db, cursor = cursor_execute(args, db, cursor, sql) db.commit() # --------------------------------------------------------------------------- ks = dbcheckhash['dbname'].keys() n = len(ks) bar = mk_bar(n) bar.start() for i, e in enumerate(ks): bar.update(i) dbcheckhash['dbname'][e] = False bar.finish() dbcheckhash['dbname'][dbname] = True # --------------------------------------------------------------------------- if dbname not in dbcheckhash[ 'dbname']: # # so the db has not been seen before.. time to set up lots of things... dbcheckhash['barcoded'][dbname] = False dbcheckhash['barcode_info'][dbname] = False dbcheckhash['logfile'][dbname] = os.path.join(os.path.sep, logfolder, dbname + '.minup.log') if args.verbose == "high": print 'trying database: ', dbname sys.stdout.flush() sql = "SHOW DATABASES LIKE \'%s\'" % dbname # print sql args, db, cursor = cursor_execute(args, db, cursor, sql) if cursor.fetchone(): if args.verbose == "high": print 'database exists!' sys.stdout.flush() # # drop the existing database, if selected if args.drop_db is True: sql = 'DROP DATABASE %s' % dbname # print sql args, db, cursor = cursor_execute(args, db, cursor, sql) db.commit() if args.verbose == "high": print 'database dropped.' sys.stdout.flush() else: print >> sys.stderr, "=" * 80 print >> sys.stderr, \ 'WARNING: DATABASE \"%s\" already EXISTS.\nTo write over the data re-run the minUP command with option -d' % dbname print >> sys.stderr, "=" * 80 sys.stdout.flush() if args.batch_fasta == False: # MS next 6 lines ... print >> sys.stderr, \ 'not in batch mode so exiting ...' sys.stdout.flush() terminate_minup(args, dbcheckhash, oper, minup_version) #terminate_minup(args, dbcheckhash, oper, minup_version) #sys.exit() if args.drop_db is True: print 'Deleting exisiting run from Gru now ...' sys.stdout.flush() sql = \ 'DELETE FROM Gru.userrun WHERE runindex IN (SELECT runindex FROM Gru.minIONruns WHERE runname = "%s")' \ % dbname # print sql args, db, cursor = cursor_execute(args, db, cursor, sql) db.commit() sql = "DELETE FROM Gru.minIONruns WHERE runname = \'%s\'" \ % dbname # print sql args, db, cursor = cursor_execute(args, db, cursor, sql) db.commit() print '.... Run deleted.' sys.stdout.flush() # --------------------------------------------------------------------------- # -------- This bit adds columns to Gru.minIONruns -------- modify_gru(args, db, cursor) # --------------------------------------------------------------------------- # -------- Create a new empty database #if args.verbose == "high": print 'Making new database: ', dbname print '=' * 80 sys.stdout.flush() dbF = open('dbname.txt', 'w') dbF.write(dbname + "\n") dbF.close() sql = 'CREATE DATABASE %s' % dbname #print sql args, db, cursor = cursor_execute(args, db, cursor, sql) sql = 'USE %s' % dbname args, db, cursor = cursor_execute(args, db, cursor, sql) # Create Tables .... create_general_table('config_general', args, db, cursor) create_trackingid_table('tracking_id', args, db, cursor) create_basecall_summary_info('basecall_summary', args, db, cursor) create_events_model_fastq_table('basecalled_template', args, db, cursor) create_events_model_fastq_table('basecalled_complement', args, db, cursor) create_basecalled2d_fastq_table('basecalled_2d', args, db, cursor) if args.pin is not False: create_mincontrol_interaction_table('interaction', args, db, cursor) create_mincontrol_messages_table('messages', args, db, cursor) create_mincontrol_barcode_control_table('barcode_control', args, db, cursor) #print "DONE" # --------------------------------------------------------------------------- ''' # DEPRECATIN TELEM MS 11.10.16 if args.telem is True: for i in xrange(0, 10): temptable = 'caller_basecalled_template_%d' % i comptable = 'caller_basecalled_complement_%d' % i twod_aligntable = 'caller_basecalled_2d_alignment_%d' \ % i create_caller_table_noindex(temptable, args, db, cursor) create_caller_table_noindex(comptable, args, db, cursor) create_2d_alignment_table(twod_aligntable, args, db, cursor) create_model_list_table('model_list', args, db, cursor) create_model_data_table('model_data', args, db, cursor) ''' # --------------------------------------------------------------------------- if args.preproc is True: create_pretrackingid_table('pre_tracking_id', args, db, cursor) # make another table create_pre_general_table('pre_config_general', args, db, cursor) # pre config general table # -------- Assign the correct reference fasta for this dbname if applicable if args.batch_fasta is not False: for refbasename in ref_fasta_hash.keys(): common_path = \ os.path.commonprefix((ref_fasta_hash[refbasename]['path' ], filepath)).rstrip('\\|\/|re|\\re|\/re') if common_path.endswith('downloads'): ref_fasta_hash[dbname] = ref_fasta_hash[refbasename] # del ref_fasta_hash[refbasename] if args.ref_fasta is not False: for refbasename in ref_fasta_hash.keys( ): # there should only be one key ref_fasta_hash[dbname] = ref_fasta_hash[refbasename] # --------------------------------------------------------------------------- if dbname in ref_fasta_hash: # great, we assigned the reference fasta to this dbname create_reference_table('reference_seq_info', args, db, cursor) create_5_3_prime_align_tables('last_align_basecalled_template', args, db, cursor) create_5_3_prime_align_tables('last_align_basecalled_complement', args, db, cursor) create_5_3_prime_align_tables('last_align_basecalled_2d', args, db, cursor) if args.last_align is True: # create_align_table('last_align_basecalled_template', args, db, cursor) # create_align_table('last_align_basecalled_complement', args, db, cursor) # create_align_table('last_align_basecalled_2d', args, db, cursor) create_align_table_maf('last_align_maf_basecalled_template', args, db, cursor) create_align_table_maf('last_align_maf_basecalled_complement', args, db, cursor) create_align_table_maf('last_align_maf_basecalled_2d', args, db, cursor) if args.bwa_align is True: create_align_table_sam('align_sam_basecalled_template', args, db, cursor) create_align_table_sam('align_sam_basecalled_complement', args, db, cursor) create_align_table_sam('align_sam_basecalled_2d', args, db, cursor) # dbcheckhash["mafoutdict"][dbname]=open(dbname+"."+process+".align.maf","w") ''' # DEPRECATIN TELEM MS 11.10.16 if args.telem is True: create_ref_kmer_table('ref_sequence_kmer', args, db, cursor) ''' if args.prealign is True: create_pre_align_table('pre_align_template', args, db, cursor) create_pre_align_table('pre_align_complement', args, db, cursor) create_pre_align_table('pre_align_2d', args, db, cursor) create_align_table_raw('last_align_raw_template', args, db, cursor) create_align_table_raw('last_align_raw_complement', args, db, cursor) create_align_table_raw('last_align_raw_2d', args, db, cursor) for refname in ref_fasta_hash[dbname]['seq_len'].iterkeys(): # print "refname", refname reference = ref_fasta_hash[dbname]['seq_file'][refname] reflen = ref_fasta_hash[dbname]['seq_len'][refname] reflength = ref_fasta_hash[dbname]['seq_file_len'][reference] refid = mysql_load_from_hashes( args, db, cursor, 'reference_seq_info', { 'refname': refname, 'reflen': reflen, 'reffile': reference, 'ref_total_len': reflength, }) ref_fasta_hash[dbname]['refid'][refname] = refid ''' # DEPRECATIN TELEM MS 11.10.16 if args.telem is True: kmers = ref_fasta_hash[dbname]['kmer'][refname] load_ref_kmer_hash(args, db, 'ref_sequence_kmer', kmers, refid, args, db, cursor) ''' # --------------------------------------------------------------------------- # -------- See if theres any ENA XML stuff to add. # -------- Need to do this now as it changes the "comment" # -------- in Gru.minionRuns entry # print "C", comment ena_flowcell_owner = None for xml_to_downloads_path in xml_file_dict.keys(): # xmlpath=xml_file_dict["study"][study_id]["path"] common_path = os.path.commonprefix( (xml_to_downloads_path, filepath)).rstrip('\\|\/|re') if common_path.endswith('downloads'): print 'found XML data for:', dbname sys.stdout.flush() create_xml_table('XML', args, db, cursor) # --------------------------------------------------------------------------- downloadsPath = xml_file_dict[xml_to_downloads_path] for study_id in \ downloadsPath['study'].keys(): ena_flowcell_owner = study_id study_xml = \ downloadsPath['study'][study_id]['xml'] study_file = \ downloadsPath['study'][study_id]['file'] study_title = \ downloadsPath['study'][study_id]['title'] study_abstract = \ downloadsPath['study'][study_id]['abstract'] exp_c = 'NA' samp_c = 'NA' run_c = 'NA' mysql_load_from_hashes( args, db, cursor, 'XML', { 'type': 'study', 'primary_id': study_id, 'filename': study_file, 'xml': study_xml, }) for exp_id in \ downloadsPath['experiment'].keys(): if study_id \ == downloadsPath['experiment'][exp_id]['study_id']: exp_c = exp_id exp_xml = \ downloadsPath['experiment'][exp_id]['xml'] exp_file = \ downloadsPath['experiment'][exp_id]['file'] sample_id = \ downloadsPath['experiment'][exp_id]['sample_id'] mysql_load_from_hashes( args, db, cursor, 'XML', { 'type': 'experiment', 'primary_id': exp_id, 'filename': exp_file, 'xml': exp_xml, }) if sample_id \ in downloadsPath['sample' ]: samp_c = sample_id sample_xml = \ downloadsPath['sample'][sample_id]['xml'] sample_file = \ downloadsPath['sample'][sample_id]['file'] mysql_load_from_hashes( args, db, cursor, 'XML', { 'type': 'sample', 'primary_id': sample_id, 'filename': sample_file, 'xml': sample_xml, }) for run_id in \ downloadsPath['run'].keys(): if exp_id \ == downloadsPath['run'][run_id]['exp_id']: run_c = run_id run_xml = \ downloadsPath['run'][run_id]['xml'] run_file = \ downloadsPath['run'][run_id]['file'] mysql_load_from_hashes( args, db, cursor, 'XML', { 'type': 'run', 'primary_id': run_id, 'filename': run_file, 'xml': run_xml, }) comments[dbname] = \ 'ENA data. Study:%s Title: %s Abstract: %s Experiment:%s Sample:%s Run:%s' \ % ( study_id, study_title, study_abstract, exp_c, samp_c, run_c, ) # --------------------------------------------------------------------------- # --------- Make entries in the Gru database # try and get the right basecall-configuration general file_type = check_read_type(args, filepath, hdf) #print "FILETYPE is", file_type basecalltype = getBasecalltype(args, file_type) # MS basecalldir = '' basecalldirconfig = '' basecallindexpos = '' #ML ''' try: if file_type == 2: basecalltype2="Basecall_2D" string2='' #ML for x in range (0,9): string2 = '/Analyses/Hairpin_Split_00%s/Configuration/general' % (x) #ML if (string2 in hdf): basecallindexpos=x #ml #print "BASECALLINDEXPOS",basecallindexpos basecalldirconfig=string2 #ML string='/Analyses/%s_00%s/Configuration/general' % (basecalltype, basecallindexpos) #print string if (string in hdf): # print "YES 1" basecalldir='/Analyses/%s_00%s/' % (basecalltype,basecallindexpos) #basecallindexpos=x #ml #break string='/Analyses/%s_00%s/Configuration/general' % (basecalltype2, basecallindexpos) #print string if (string2 in hdf): #print "YES 2" basecalldir='/Analyses/%s_00%s/' % (basecalltype2,basecallindexpos) #basecalldirconfig=string2 #ML #break except: print "checkReads(): error line 467." sys.exit() try: if file_type in [1,0]: basecalltype = 'Basecall_1D_CDNA' basecalltype2 = 'Basecall_2D' basecalldir = '' basecalldirconfig = '' basecallindexpos='' ''' try: # MS for x in range(0, 9): string = '/Analyses/%s_00%s/Configuration/general' \ % (basecalltype, x) if string in hdf: basecalldir = '/Analyses/%s_00%s/' % (basecalltype, x) basecalldirconfig = string basecallindexpos = x break ''' string = '/Analyses/%s_00%s/Configuration/general' \ % (basecalltype2, x) if string in hdf: basecalldir = '/Analyses/%s_00%s/' % (basecalltype, x) basecalldirconfig = string basecallindexpos=x break ''' # print "basecalldirconfig", basecalldirconfig # # get some data out of tacking_id and general except: print "checkReads(): error line 496." sys.stdout.flush() #sys.exit() #print basecalldirconfig #print basecalldir if len(basecalldirconfig) > 0: configdata = hdf[basecalldirconfig] if len(basecalldir) > 0: metrichor_info = hdf[basecalldir] # else: # ....configdata.attrs['workflow_name'] ="preanalysed" trackingid = hdf['/UniqueGlobalKey/tracking_id'] print trackingid.attrs['exp_start_time'] #print dateutil.parser.parse(trackingid.attrs['exp_start_time']) #print int(time.mktime(dateutil.parser.parse(trackingid.attrs['exp_start_time']).timetuple())) #print datetime.datetime.fromtimestamp(int(time.mktime(dateutil.parser.parse(trackingid.attrs['exp_start_time']).timetuple()))).strftime('%Y-%m-%d') expstarttimecode = \ datetime.datetime.fromtimestamp(int(testtime(trackingid.attrs['exp_start_time' ]))).strftime('%Y-%m-%d') flowcellid = trackingid.attrs['device_id'] if len(basecalldirconfig) > 0: basecalleralg = configdata.attrs['workflow_name'] else: basecalleralg = 'preanalysed' if len(basecalldir) > 0: #version = metrichor_info.attrs['chimaera version'] try: version = metrichor_info.attrs['chimaera version'] # MS except: version = metrichor_info.attrs['version'] # MS else: version = 'unknown' runnumber = args.run_num flowcellowner = 'NULL' username = args.minotourusername if args.flowcell_owner is not None: flowcellowner = args.flowcell_owner if ena_flowcell_owner is not None: flowcellowner = ena_flowcell_owner # # get info on the reference sequence, if used big_reference = 'NOREFERENCE' big_reflength = '0' if dbname in ref_fasta_hash: # so there's some reference data for this dbname big_reference = ref_fasta_hash[dbname]['big_name'] big_reflength = ref_fasta_hash[dbname]['big_len'] # # make entries into Gru for this new database comment = comments['default'] if dbname in comments: comment = comments[dbname] process = 'noalign' if args.last_align is True: process = 'LAST' if args.bwa_align is True: process = 'BWA' wdir = args.watchdir if wdir.endswith('\\'): # remove trailing slash for windows. wdir = wdir[:-1] sql = \ "INSERT INTO Gru.minIONruns (date,user_name,flowcellid,runname,activeflag,comment,FlowCellOwner,RunNumber,reference,reflength,basecalleralg,version,minup_version,process,mt_ctrl_flag,watch_dir,host_ip) VALUES ('%s','%s','%s','%s',%s,'%s','%s',%s,'%s',%s,'%s','%s','%s','%s',%s,'%s','%s')" \ % ( expstarttimecode, args.minotourusername, flowcellid, dbname, 1, comment, flowcellowner, runnumber, big_reference, big_reflength, basecalleralg, version, minup_version, process, 1, wdir, ip, ) #print sql if args.verbose == "high": print sql debug() #if args.verbose == "high": print '... Database created.' sys.stdout.flush() db.escape_string(sql) args, db, cursor = cursor_execute(args, db, cursor, sql) db.commit() runindex = cursor.lastrowid dbcheckhash['runindex'][dbname] = runindex #print "Runindex:",runindex # # add us">> ", view_users if args.verbose == "high": print "Adding users..." sys.stdout.flush() view_users = [username] if args.view_users: extra_names = args.view_users.split(',') # view_users = args.view_users + extra_names # MS view_users = view_users + extra_names # MS for user_name in view_users: sql = \ "SELECT user_id FROM Gru.users WHERE user_name =\'%s\'" \ % user_name # print sql args, db, cursor = cursor_execute(args, db, cursor, sql) if 0 < cursor.rowcount: sql = \ 'INSERT INTO Gru.userrun (user_id, runindex) VALUES ((SELECT user_id FROM Gru.users WHERE user_name =\'%s\') , (SELECT runindex FROM Gru.minIONruns WHERE runname = "%s") )' \ % (user_name, dbname) if args.verbose == "high": print sql debug() # print sql args, db, cursor = cursor_execute(args, db, cursor, sql) db.commit() else: print 'The MinoTour username "%s" does not exist. Please create it or remove it from the input arguments' \ % user_name sys.stdout.flush() sys.exit() # # Create comment table if it doesn't exist create_comment_table_if_not_exists('Gru.comments', args, db, cursor) # # Add first comment to table start_time = time.strftime('%Y-%m-%d %H:%M:%S') comment_string = 'minUp version %s started' % minup_version mysql_load_from_hashes( args, db, cursor, 'Gru.comments', { 'runindex': runindex, 'runname': dbname, 'user_name': args.minotourusername, 'comment': comment_string, 'name': args.dbusername, 'date': start_time, }) # --------------------------------------------------------------------------- # --------- make log file and initinal entry with open(dbcheckhash['logfile'][dbname], 'w') as logfilehandle: logfilehandle.write('minup started at:\t%s%s' % (start_time, os.linesep)) logfilehandle.write('minup version:\t%s%s' % (minup_version, os.linesep)) logfilehandle.write('options:' + os.linesep) logfilehandle.write('minotour db host:\t%s%s' % (args.dbhost, os.linesep)) logfilehandle.write('minotour db user:\t%s%s' % (args.dbusername, os.linesep)) logfilehandle.write('minotour username:\t%s%s' % (args.minotourusername, os.linesep)) logfilehandle.write('minotour viewer usernames:\t%s%s' % (view_users, os.linesep)) logfilehandle.write('flowcell owner:\t%s%s' % (flowcellowner, os.linesep)) logfilehandle.write('run number:\t%s%s' % (args.run_num, os.linesep)) logfilehandle.write('watch directory:\t%s%s' % (args.watchdir, os.linesep)) ''' # DEPRECATIN TELEM MS 11.10.16 logfilehandle.write('upload telemetry:\t%s%s' % (args.telem, os.linesep)) ''' logfilehandle.write('Reference Sequences:' + os.linesep) if dbname in ref_fasta_hash: for refname in ref_fasta_hash[dbname]['seq_len'].iterkeys(): logfilehandle.write( 'Fasta:\t%s\tlength:\t%d%s' % (ref_fasta_hash[dbname]['seq_file'][refname], ref_fasta_hash[dbname]['seq_len'][refname], os.linesep)) else: logfilehandle.write('No reference sequence set' + os.linesep) logfilehandle.write('comment:\t%s%s' % (comment, os.linesep)) logfilehandle.write('Errors:' + os.linesep) logfilehandle.close() #startMincontrol(args, dbname, cursor, dbcheckhash,\ # minup_version, oper) # # connection_pool for this db connection_pool[dbname] = list() ''' # DEPRECATIN LAST TELEM MS 11.10.16 if args.last_align is True \ or args.bwa_align is True \ or args.telem is True: ''' if args.bwa_align is True: try: db_a = MySQLdb.connect(host=args.dbhost, user=args.dbusername, passwd=args.dbpass, port=args.dbport, db=dbname) connection_pool[dbname].append(db_a) db_b = MySQLdb.connect(host=args.dbhost, user=args.dbusername, passwd=args.dbpass, port=args.dbport, db=dbname) connection_pool[dbname].append(db_b) db_c = MySQLdb.connect(host=args.dbhost, user=args.dbusername, passwd=args.dbpass, port=args.dbport, db=dbname) connection_pool[dbname].append(db_c) except Exception, err: err_string = 'Error bwa_align: %s ' % err print >> sys.stderr, \ "Can't setup MySQL connection pool: %s" % err sys.stdout.flush() with open(dbcheckhash['logfile'][dbname], 'a') as \ logfilehandle: logfilehandle.write(err_string + os.linesep) logfilehandle.close() sys.stdout.flush() sys.exit() # --------- this bit last to set the active database in this hash if dbcheckhash['dbname']: for e in dbcheckhash['dbname'].keys(): dbcheckhash['dbname'][e] = False dbcheckhash['dbname'][dbname] = True