def loadResults_Mitocheck(self,exp_list): ''' Here we're loading results on a per experiment basis. This will be interesting to look at distances between experiments based on phenotypes, vs distances based on trajectory types. ''' if len(exp_list[0])!=2: exp_list=strToTuple(exp_list, os.listdir(self.settings.outputFolder)) result = None; i=0; missed=[] for pl,w in exp_list: print i, try: f=open(os.path.join(self.settings.outputFolder,pl, self.settings.outputFile.format(pl[:10], w)), 'r') pheno_seq_list, mask = pickle.load(f) f.close() except: print "Loading error for ", pl, w missed.append(i) continue else: pheno_seq_list = np.sum( np.array([np.bincount(pheno_seq_list[j], minlength=17) for j in range(len(pheno_seq_list)) if j not in mask]), 0)[:-2] #15 and 16 are respectively out of focus and artefact objects. We don't want them pheno_seq_list=pheno_seq_list/float(np.sum(pheno_seq_list)) result = np.vstack((result, pheno_seq_list)) if result is not None else pheno_seq_list finally: i+=1 print "Saving" f=open(os.path.join(self.settings.outputFolder,self.settings.outputFile.format("ALL", "hit_exp")), 'w') pickle.dump((result, missed),f); f.close() return
def scriptCommand(exp_list, baseName='comp_track', command="tracking/trajPack/cell_cycle.py",jobSize=10, h5_result_dir="/share/data20T/mitocheck/Alice/results", max_nodes=500, **kwargs): perExperiment=False if type(exp_list[0])!=int: perExperiment=True if len(exp_list[0])!=2: exp_list=strToTuple(exp_list, os.listdir(h5_result_dir)) fileNumber = int(len(exp_list)/float(jobSize))+1 head = """#!/bin/sh cd %s""" %progFolder for keyword in kwargs: command = command +" -{} {}".format(keyword, kwargs[keyword]) for k in range(fileNumber): cmd = '' for exp in exp_list[jobSize*k:jobSize*(k+1)]: if perExperiment: pl,w=exp temp_cmd = """ python %s -p %s -w %s""" temp_cmd %= ( command, pl, w ) else: temp_cmd = """ python %s -i %i""" temp_cmd %= ( command, exp ) cmd += temp_cmd # this is now written to a script file (simple text file) # the script file is called ltarray<x>.sh, where x is 1, 2, 3, 4, ... and corresponds to the job index. script_name = os.path.join(scriptFolder, '%s%i.sh' % (baseName, k+1)) script_file = file(script_name, "w") script_file.write(head + cmd) script_file.close() # make the script executable (without this, the cluster node cannot call it) os.system('chmod a+x %s' % script_name) # write the main script array_script_name = '%s.sh' % os.path.join(scriptFolder, baseName) main_script_file = file(array_script_name, 'w') main_content = """#!/bin/sh %s #$ -o %s #$ -e %s %s$%s.sh """ % (path_command, pbsOutDir, pbsErrDir, os.path.join(scriptFolder, baseName), pbsArrayEnvVar) main_script_file.write(main_content) os.system('chmod a+x %s' % array_script_name) sub_cmd = 'qsub -tc %i -t 1-%i %s' % (max_nodes, fileNumber, array_script_name) print sub_cmd return 1
def globalSummaryScript(baseName, siRNAFile, n_clusters_min, n_clusters_max, div_name, lambda_, weights, bins_type, bin_size, cost_type, batch_size, n_init, init, ddim, iter_=0): f=open(siRNAFile, 'r') siRNAList = pickle.load(f); f.close() siExpDict = expSi(qc = quality_control_file , sens=0) jobCount = 0 i=0 total_expList = [] head = """#!/bin/sh cd %s""" %progFolder baseName = baseName+'{}{}_w{}_{}_{}_{}'.format(iter_,div_name[:5], weights, bins_type, bin_size, cost_type) #A. DEALING WITH EXPERIMENTS for siRNA in siRNAList: try: expList = siExpDict[siRNA] except KeyError: print "siRNA not in siRNA-experiment dictionary" else: expList = strToTuple(expList, os.listdir(data_folder)) total_expList.extend(expList) for plate, well in expList: jobCount += 1; i+=1 cmd = plateWellSummaryScript(plate, well, div_name, lambda_, weights, bins_type, bin_size, cost_type, batch_size, n_init, init, ddim, iter_) # this is now written to a script file (simple text file) # the script file is called ltarray<x>.sh, where x is 1, 2, 3, 4, ... and corresponds to the job index. script_name = os.path.join(scriptFolder, baseName+'{}.sh'.format(i)) script_file = file(script_name, "w") script_file.write(head + cmd) script_file.close() # make the script executable (without this, the cluster node cannot call it) os.system('chmod a+x %s' % script_name) #B. DEALING WITH CONTROLS ctrlExp = appendingControl(total_expList) ctrlExp = countingDone(ctrlExp) np.random.shuffle(ctrlExp) ctrlExp=ctrlExp[:int(0.2*len(total_expList))] for plate, well in ctrlExp: jobCount += 1; i+=1 cmd = plateWellSummaryScript(plate, well, div_name, lambda_, weights, bins_type, bin_size, cost_type, batch_size, n_init, init, ddim, iter_) # this is now written to a script file (simple text file) # the script file is called ltarray<x>.sh, where x is 1, 2, 3, 4, ... and corresponds to the job index. script_name = os.path.join(scriptFolder, baseName+'{}.sh'.format(i)) script_file = file(script_name, "w") script_file.write(head + cmd) script_file.close() # make the script executable (without this, the cluster node cannot call it) os.system('chmod a+x %s' % script_name) # write the main script array_script_name = '%s.sh' % os.path.join(scriptFolder, baseName) main_script_file = file(array_script_name, 'w') main_content = """#!/bin/sh %s #$ -o %s #$ -e %s %s$%s.sh """ % (path_command, pbsOutDir, pbsErrDir, os.path.join(scriptFolder, baseName), pbsArrayEnvVar) main_script_file.write(main_content) main_script_file.close() os.system('chmod a+x %s' % array_script_name) sub_cmd = 'qsub -t 1-%i %s' % (jobCount, array_script_name) print sub_cmd #C. DOING EXPERIMENT CLUSTERING STEP expFilename = 'exp_Simpson_{}.pkl'.format(int(time.time())) total_expList.extend(ctrlExp) f=open(expFilename, 'w') pickle.dump(total_expList, f) f.close() baseName = baseName+'_clustering' for n_clusters in range(n_clusters_min, n_clusters_max): script_name = os.path.join(scriptFolder, baseName+'{}.sh'.format(n_clusters-n_clusters_min)) script_file = file(script_name, "w") cmd=""" python tracking/histograms/summarization_clustering.py -a clustering --experimentFile %s -k %i --ddimensional %i --bins_type %s --cost_type %s --bin_size %i --div_name %s -w %i --init %s --batch_size %i --iter %i """ cmd %= ( expFilename, n_clusters, ddim, bins_type, cost_type, bin_size, div_name, weights, init, batch_size, iter_ ) script_file.write(head + cmd) script_file.close() os.system('chmod a+x %s' % script_name) # write the main script array_script_name = '%s.sh' % os.path.join(scriptFolder, baseName) main_script_file = file(array_script_name, 'w') main_content = """#!/bin/sh %s #$ -o %s #$ -e %s %s$%s.sh """ % (path_command, pbsOutDir, pbsErrDir, os.path.join(scriptFolder, baseName), pbsArrayEnvVar) main_script_file.write(main_content) main_script_file.close() os.system('chmod a+x %s' % array_script_name) sub_cmd = 'qsub -hold_jid -t 1-%i %s' % (n_clusters_max - n_clusters_min, array_script_name) print sub_cmd #D. GOING BACK TO EXPERIMENTS AND TESTING IF DIFFERENT FROM CONTROLS return 1
def hitFinderScript(baseName, siRNAFile, testCtrl=False, iter_=0): f=open(siRNAFile, 'r') siRNAList = pickle.load(f); f.close() jobCount = 0 head = """#!/bin/sh cd %s""" %progFolder baseName+='{}'.format(iter_) if testCtrl: baseName = baseName+'CTRL' expList = [] yqualDict=expSi(quality_control_file, sens=0) for siRNA in siRNAList: expList.extend(strToTuple(yqualDict[siRNA], os.listdir(data_folder))) plates = Counter(np.array(expList)[:,0]).keys() for i,plate in enumerate(plates): jobCount+=1; i+=1 cmd = ''' python tracking/histograms/summarization_clustering.py -a hitFinder --verbose 0 --testCtrl %s --iter %i ''' cmd%=(plate, iter_) # this is now written to a script file (simple text file) # the script file is called ltarray<x>.sh, where x is 1, 2, 3, 4, ... and corresponds to the job index. script_name = os.path.join(scriptFolder, baseName+'{}.sh'.format(i)) script_file = file(script_name, "w") script_file.write(head + cmd) script_file.close() # make the script executable (without this, the cluster node cannot call it) os.system('chmod a+x %s' % script_name) else: for i,siRNA in enumerate(siRNAList): jobCount+=1; i+=1 cmd = ''' python tracking/histograms/summarization_clustering.py -a hitFinder --siRNA %s --verbose 0 --iter %i ''' cmd%=(siRNA, iter_) # this is now written to a script file (simple text file) # the script file is called ltarray<x>.sh, where x is 1, 2, 3, 4, ... and corresponds to the job index. script_name = os.path.join(scriptFolder, baseName+'{}.sh'.format(i)) script_file = file(script_name, "w") script_file.write(head + cmd) script_file.close() # make the script executable (without this, the cluster node cannot call it) os.system('chmod a+x %s' % script_name) # write the main script array_script_name = '%s.sh' % os.path.join(scriptFolder, baseName) main_script_file = file(array_script_name, 'w') main_content = """#!/bin/sh %s #$ -o %s #$ -e %s %s$%s.sh """ % (path_command, pbsOutDir, pbsErrDir, os.path.join(scriptFolder, baseName), pbsArrayEnvVar) main_script_file.write(main_content) main_script_file.close() os.system('chmod a+x %s' % array_script_name) sub_cmd = 'qsub -t 1-%i %s' % (jobCount, array_script_name) print sub_cmd return