def mito_PHENOSCORE(file_='MITO_pheno_scores.pkl', 
                    folder='/media/lalil0u/New/projects/drug_screen/results/'):
    f=open('../data/mitocheck_exp_hitlist_perPheno.pkl')
    hitlistperpheno=pickle.load(f)
    f.close()
    
    f=open(os.path.join(folder, file_), 'r')
    scores, who=pickle.load(f); f.close()
    norm = mpl.colors.Normalize(-0.2,0.6)
    yqualdict=expSi('../data/mapping_2014/qc_export.txt')
    dictSiEntrez=siEntrez('../data/mapping_2014/mitocheck_siRNAs_target_genes_Ens75.txt')
    
    bigGenesL=np.array([dictSiEntrez[yqualdict[e]] for e in who])
    
    for pheno in hitlistperpheno:
        expL=hitlistperpheno[pheno]
        genesL=np.array([dictSiEntrez[yqualdict[e]] for e in filter(lambda x: yqualdict[x] in dictSiEntrez, expL)])
        distinct_genes = sorted(list(set(genesL)))
        ind=np.hstack((np.where(bigGenesL==gene)[0] for gene in distinct_genes))
        print pheno, 
        
        currscores=scores[ind]
        print currscores.shape
        f=p.figure()
        ax=f.add_subplot(111)
        ax.matshow(currscores.T, cmap=mpl.cm.YlOrRd, norm=norm, aspect='auto')
        ax.set_title(pheno)
        ax.set_yticks(range(15))
        ax.set_yticklabels(CLASSES)
        
#        p.show()
        p.savefig(os.path.join(folder, 'pheno_score_MITO_{}.png'.format(pheno[:10])))
    return
예제 #2
0
def condition_inference(M, who_hits, exposure_hits, who_Mitocheck, num_permutations, threshold, random_result, 
                         taking_siRNAs=False):
    '''
    - M: distance matrix of size (hits, mitocheck)
    - taking_siRNAs: indicates if you want to consider different values of the same siRNAs independently (False) or as replicates of the same condition
'''
    r={}
    yqualdict=expSi('../data/mapping_2014/qc_export.txt')
    yqualdict.update(expSi('../data/mapping_2014/qc_validation_exp.txt', primary_screen=False))
    dictSiEntrez=siEntrez('../data/mapping_2014/mitocheck_siRNAs_target_genes_Ens75.txt')
    
    siRNAs=[yqualdict[e] for e in who_Mitocheck]
    genes=[dictSiEntrez[e] for e in siRNAs]
    
    count=Counter(exposure_hits)
    #This way I look at exposures that are hits at least 50% of the times/plates
    for el in filter(lambda x: count[x]/float(PASSED_QC_COND[x])>0.5, count):
        print el
        where_=np.where(exposure_hits==el)[0]
        
        batch_names = who_hits[where_]
        
        curr_dist=np.hstack((M[j] for j in where_))
        curr_who=[(batch_names[0], '') for k in range(M.shape[1])]
        
        curr_conditions=list(who_Mitocheck) if not taking_siRNAs else list(siRNAs)
        for name in batch_names[1:]:
            curr_who.extend([(name, '') for k in range(M.shape[1])])
            if not taking_siRNAs:
                curr_conditions.extend(who_Mitocheck)
            else:
                curr_conditions.extend(siRNAs)
        print curr_dist.shape
        r[el], random_result=computeRPpvalues(curr_dist, np.array(curr_who), conditions=np.array(curr_conditions), technical_replicates_key=np.median, 
                     num_permutations=num_permutations, reverse=False, 
                     batch_names=batch_names, random_result=random_result,
                     signed=False)
        r[el]=sorted(r[el], key=itemgetter(2))
        pval=np.array(np.array(r[el])[:,-1], dtype=float)
        if not taking_siRNAs:
            currG=[dictSiEntrez[yqualdict[e]] for e in np.array(r[el])[np.where(pval<=threshold)][:,0]]
        else:
            currG=[dictSiEntrez[e] for e in np.array(r[el])[np.where(pval<=threshold)][:,0]]
        print sorted(Counter(currG).keys())
        
        
    return r
예제 #3
0
    def MITO_usable(self, yqualDict=None, dictSiEntrez=None):
        if yqualDict==None:
            dictSiEntrez=siEntrez(self.settings.mitocheck_mapping_file)
            
            if 'LTValidMitosis' in self.plate:
                yqualDict=expSi(self.settings.valid_qc_file, primary_screen=False)
                test0='{}--{:>03}'.format(self.plate.split('--')[0], self.well) not in yqualDict
                test=yqualDict['{}--{:>03}'.format(self.plate.split('--')[0], self.well)] not in dictSiEntrez
            else:
                yqualDict=expSi(self.settings.mitocheck_qc_file)
                test0='{}--{:>03}'.format(self.plate[:9], self.well) not in yqualDict
                test=not is_ctrl_mitocheck((self.plate[:9], '{:>05}'.format(self.well))) and yqualDict['{}--{:>03}'.format(self.plate[:9], self.well)] not in dictSiEntrez
            

        if test0:
    #i. checking if quality control passed
            sys.stderr.write("Quality control not passed {} {} \n".format(self.plate, self.well))
            return False
        if test:
    #ii.checking if siRNA corresponds to a single target in the current state of knowledge
            sys.stderr.write( "SiRNA having no target or multiple target {} {}\n".format(self.plate, self.well))
            return False
        return True
예제 #4
0
 def load_pheno_seq_results_MITO(self,exp_list, time_aggregated=False):
     '''
     Here we're loading results from per frame files (pheno_count) on a per experiment basis. This will be interesting to look at distances between experiments
     based on phenotypes, aggregated on time
     '''
     missed=[]
     yqualDict=expSi(self.settings.mitocheck_qc_file)
     dictSiEntrez=siEntrez(self.settings.mitocheck_mapping_file)
     result = None; i=0; who=[]
     for pl,w in exp_list:
         print i,
         
         try:
             f=open(os.path.join(self.settings.outputFolder,pl, self.settings.outputFile.format(pl[:9], w)), 'r')
             pheno_seq_per_frame= pickle.load(f)
             f.close()
         except:
             print "Loading error for ", pl, w
             self.plate=pl; self.well=w
             if self.MITO_usable():
                 missed.append((pl,w))
             continue
         else:
         #15 and 16 are respectively out of focus and artefact objects. We don't want them
             if time_aggregated:
                 pheno_seq_per_frame=np.sum(pheno_seq_per_frame, 0)
                 pheno_seq_list=pheno_seq_per_frame/float(np.sum(pheno_seq_per_frame))
                 result = np.vstack((result, pheno_seq_list)) if result is not None else pheno_seq_list
             else:
                 pheno_seq_per_frame=np.vstack((np.sum(pheno_seq_per_frame[self.settings.time_agg*k:self.settings.time_agg*(k+1)],0) 
                                                for k in range(pheno_seq_per_frame.shape[0]/self.settings.time_agg)))
                 if result is not None:
                     shape_=min(result.shape[1], pheno_seq_per_frame.shape[0])
                     result = np.vstack((result[:,:shape_], pheno_seq_per_frame[np.newaxis][:,:shape_]))
                 else:
                     result= pheno_seq_per_frame[np.newaxis]
                 
     #ATTENTION gros bug avec LTValidMItosis si on limite a la taille et non pas au split de -- le nom de la plaque...
             who.append('{}--{:>03}'.format(pl.split('--')[0], w))
         finally:
             i+=1
     
     _name='time' if not time_aggregated else ''
             
     print "Saving to ",self.settings.outputFile.format("ALL", "MITO_{}".format(_name))
     
     f=open(os.path.join(self.settings.outputFolder,self.settings.outputFile.format("ALL", "MITO_{}".format(_name))), 'w')
     pickle.dump((result, who),f); f.close()
     return missed
예제 #5
0
def distributionLongueurs(folder, exp_list, qc):
    R=[]
    yqualDict=expSi(qc)

    print "loading from experiments list"
    i=0
    for pl, w in exp_list:
        print i,
        i+=1
        result = []
        try:
            f=open(os.path.join(folder, pl, 'hist_tabFeatures_{}.pkl'.format(w)), 'r')
            arr, _, _= pickle.load(f)
            f.close()
        except IOError:
            print "Pas de fichier {}".format(os.path.join(pl, 'hist_tabFeatures_{}.pkl'.format(w)))
        except EOFError:
            print "Probleme EOFError d'ouverture du fichier {}".format(os.path.join(pl, 'hist_tabFeatures_{}.pkl'.format(w)))
            pdb.set_trace()
        else:
    #pdb.set_trace()
            if arr==None:
                print "Array {} is None".format(os.path.join(pl, 'hist_tabFeatures_{}.pkl'.format(w)))
                pdb.set_trace()
            elif pl[:9]+'--'+w[2:5] not in yqualDict:
                print "Quality control not passed", pl[:9], w[2:5]   
    
            else:
                try:
                    #result.extend([len(coord[k][0]) for k in range(len(coord))])
                    result.append(arr.shape[0])
                except (TypeError, EOFError, ValueError, AttributeError):
                    print "Probleme avec le fichier {}".format(os.path.join(pl, 'hist_tabFeatures_{}.pkl'.format(w)))
                    pdb.set_trace()
        try:
            f=open(os.path.join(folder, pl, 'traj_noF_densities_w{}.hdf5.pkl'.format(w)))
            d=pickle.load(f); f.close()
        except:
            print 'Pbl trajectory file'
            pdb.set_trace()
        else:
            result.append(d['movie_length'])
            result.append(len(d["tracklets dictionary"][pl][w].lstTraj))
            result.append(np.mean([len(el.lstPoints) for el in d['tracklets dictionary'][pl][w].lstTraj]))
        R.append(result)
                    
    return R
def from_geneL_to_phenoHit(geneL,hitFile='../data/mitocheck_exp_hitlist_perPheno.pkl'):
    f=open(hitFile)
    expPerPheno=pickle.load(f); f.close()
    yqualdict=expSi('/media/lalil0u/New/workspace2/Xb_screen/data/mapping_2014/qc_export.txt')
    dictSiEntrez=siEntrez('/media/lalil0u/New/workspace2/Xb_screen/data/mapping_2014/mitocheck_siRNAs_target_genes_Ens75.txt')
    
    res=defaultdict(list)
    
    for pheno in expPerPheno:
        for exp in expPerPheno[pheno]:
            if yqualdict[exp] in dictSiEntrez:
                res[dictSiEntrez[yqualdict[exp]]].append(pheno)
                
    for gene in res:   
        res[gene]=sorted(list(set(res[gene])))
        
    return res
예제 #7
0
def evaluate_inference(result, distance_name=None, hitlist_file='/media/lalil0u/New/workspace2/Xb_screen/data/mitocheck_exp_hitlist_perPheno.pkl', 
                       print_=False,folder='/media/lalil0u/New/projects/drug_screen/results/',
                       threshold=0.0001):
    f=open(hitlist_file, 'r')
    hitlist=pickle.load(f); f.close()
    yqualdict=expSi('../data/mapping_2014/qc_export.txt')
    dictSiEntrez=siEntrez('../data/mapping_2014/mitocheck_siRNAs_target_genes_Ens75.txt')
    trad=EnsemblEntrezTrad('../data/mapping_2014/mitocheck_siRNAs_target_genes_Ens75.txt')
    
    to_plot_info=defaultdict(dict)
    to_GO_info=defaultdict(list)
    
    for cond in sorted(result):
        if print_:
            print '-----------', cond
        l=np.array([(dictSiEntrez[el[0]], el[-1]) for el in result[cond]])
        to_plot_info[cond]=defaultdict(list)
        for el in sorted(KNOWN_TARGETS[cond.split('--')[0]]):
            if np.where(l[:,0]==el)[0].shape[0]>0:
                rank_=np.where(l[:,0]==el)[0][0]
                to_plot_info[cond]['genes'].append(rank_)
                to_plot_info[cond]['gene_list'].append(el)
                if print_:
                    print el, l[np.where(l[:,0]==el)], rank_
        
        if type(threshold)==int:
            gene_lim=l[:threshold][:,0]
        else:
            lim=np.where(np.array(l[:,1], dtype=float)==threshold)[0]
            gene_lim=l[lim][:,0]
        to_GO_info[cond]=[trad[el] for el in gene_lim]
        res=[]
        for gene in gene_lim:
            res.extend(hitlist[gene])
    #STEP2 here : we add writing gene list files
            
        to_plot_info[cond]['type']= Counter(res)
    
    multipleGeneListsToFile([to_GO_info[el] for el in to_GO_info], [el for el in to_GO_info], name=os.path.join(folder, 'GO_{}.txt'.format(distance_name)))
    
    return to_plot_info
예제 #8
0
def loadPredictions(loadingFolder = '../resultData/thrivisions/predictions', outputFilename = "thripred_{}_{}.pkl", sh=False, load=False,write=False,
                    mitocheck = '/cbio/donnees/aschoenauer/workspace2/Xb_screen/data/mitocheck_siRNAs_target_genes_Ens75.txt',
                    qc = '/cbio/donnees/aschoenauer/workspace2/Xb_screen/data/qc_export.txt',
                    ensembl="../data/mapping_2014/mitocheck_siRNAs_target_genes_Ens75.txt",
                    threshold=0.05, qval=None
                    ):
    if load :
        yqualDict=expSi(qc)
        dictSiEntrez=siEntrez(mitocheck, yqualDict.values())
        
        results = filter(lambda x: 'thripred' in x, os.listdir(loadingFolder))
        who=[]; siRNA=[]; genes=[]
        nb_objects_init=[];nb_objects_final=[];
        percent_thrivision=[]
        for result in results:
            try:
                f=open(os.path.join(loadingFolder, result))
                r = pickle.load(f); f.close()
            except OSError, IOError:
                pdb.set_trace()
            else:
                percent, nb_ob_init, nb_obj_final=r[:3]
                who.append((result[9:18], result[19:27]))
                nb_objects_init.append(nb_ob_init)
                nb_objects_final.append(nb_obj_final)
                percent_thrivision.append(percent*nb_ob_init)
                siCourant = yqualDict[result[9:18]+'--'+result[21:24]]
                siRNA.append(siCourant)
                try:
                    genes.append(dictSiEntrez[siCourant])
                except KeyError:
                    if siCourant in ["scramble", '103860', '251283']:
                        genes.append('ctrl')
                    else:
                        pdb.set_trace()
                        genes.append('ctrl')
        f=open(os.path.join(loadingFolder, "all_predictions.pkl"), 'w')
        pickle.dump((nb_objects_init, nb_objects_final, percent_thrivision, who, genes, siRNA),f); f.close()
        return
def selecting_right_Mito_exp(folder='/media/lalil0u/New/projects/drug_screen/results/'):
    f=open('../data/ANCIENTmitocheck_exp_hitlist.pkl')
    mito_hitexp=list(set(pickle.load(f)))
    f.close()
    
    f=open(os.path.join(folder, 'MITO_pheno_scores_NOTVAL.pkl'))
    r=pickle.load(f);f.close()
    big_phenoscore=dict(zip(r[1], r[0]))

    f=open(os.path.join(folder, 'MITO_pheno_scores_VAL.pkl'))
    r=pickle.load(f);f.close()
    val_phenoscore=dict(zip(r[1], r[0]))

    yqualdict=expSi('/media/lalil0u/New/workspace2/Xb_screen/data/mapping_2014/qc_export.txt')
    dictSiEntrez=siEntrez('/media/lalil0u/New/workspace2/Xb_screen/data/mapping_2014/mitocheck_siRNAs_target_genes_Ens75.txt')
    genes_big=[]
    gene_si=defaultdict(list)
    res_siRNA=defaultdict(list)
    si_exp=defaultdict(list)
    
    unmapped_si=[]
    
    for exp in mito_hitexp:
        try:
            currSi=yqualdict[exp]; currGene = dictSiEntrez[currSi]
            
        except KeyError:
            #print "{} siRNA not in mapping file anymore".format(yqualdict[exp]),
            unmapped_si.append(yqualdict[exp])
        else:
            if exp in big_phenoscore:
                genes_big.append(currGene)
                gene_si[currGene].append(currSi)
                res_siRNA[currSi].append(big_phenoscore[exp][0])
                si_exp[currSi].append(exp)
            else:
                print "{} no data".format(exp)
                continue
    print "------------------------------------------------------Youpi next step looking at validation experiments"
    genes_big=sorted(list(set(genes_big)))
            
    yqualdict2=expSi('/media/lalil0u/New/workspace2/Xb_screen/data/mapping_2014/qc_validation_exp.txt', primary_screen=False)
    genes_big2=[]
    for exp in yqualdict2:
        try:
            currSi=yqualdict2[exp]; currGene = dictSiEntrez[currSi]
        except KeyError:
            if yqualdict2[exp]!='empty':
#                print "{} siRNA not in mapping file anymore".format(yqualdict2[exp]),
                unmapped_si.append(yqualdict2[exp])
        else:
            if exp in val_phenoscore:
                genes_big2.append(currGene)
                gene_si[currGene].append(currSi)
                res_siRNA[currSi].append(val_phenoscore[exp][0])
                si_exp[currSi].append(exp)
            else:
                print "{} no data".format(exp)
                continue
            
    genes_big2=sorted(list(set(genes_big2)))
    
    print "Unmapped siRNAs ", len(unmapped_si)
    
    print "How many genes do we gain by using validation experiments?"
    print len([el for el in genes_big if el not in genes_big2])
    
    genes_big.extend(genes_big2)
    genes_big=sorted(list(set(genes_big)))
    print "How many genes finally ", len(genes_big)
    final_siRNA_list=[]
    final_exp_list=[]
    genesL=[]
    count_siRNA_total=0
    for gene in genes_big:
        currSiL=gene_si[gene]
        counts=Counter(currSiL)
        count_siRNA_total+=len(counts)
        currRes=[]; siL=[]
        for siRNA in filter(lambda x: counts[x]>=2, counts):
            currRes.append(np.median(res_siRNA[siRNA]))
            siL.append(siRNA)

        if currRes!=[]: 
            choice=np.array(siL)[np.argmin(np.array(currRes))]
            final_exp_list.extend(si_exp[choice])
            final_siRNA_list.extend([choice for k in range(counts[choice])])
            genesL.extend([gene for k in range(counts[choice])])
    print "How many siRNAs in total ", count_siRNA_total
    return genesL, final_exp_list, final_siRNA_list
예제 #10
0
def condition_cluster_inference(M, clusters, who_hits, exposure_hits, who_Mitocheck, num_permutations, threshold, random_result, filename, 
                         taking_siRNAs=True, gsea=False):
    '''
    - M: distance matrix of size (hits, mitocheck)
    - taking_siRNAs: indicates if you want to consider different values of the same siRNAs independently (False) or as replicates of the same condition
    - num_permutations: no calculation of p-values if None, else number of permutations
    - filename if we want to write GSEA ranking files
    - gsea : if you want to write gsea ranking files
'''
    r={}
    yqualdict=expSi('../data/mapping_2014/qc_export.txt')
    yqualdict.update(expSi('../data/mapping_2014/qc_validation_exp.txt', primary_screen=False))
    dictSiEntrez=siEntrez('../data/mapping_2014/mitocheck_siRNAs_target_genes_Ens75.txt')
    trad=EnsemblEntrezTrad('../data/mapping_2014/mitocheck_siRNAs_target_genes_Ens75.txt')
    
    siRNAs=[yqualdict[e] for e in who_Mitocheck]
    genes=[dictSiEntrez[e] for e in siRNAs]
    past_cluster_num=0
    for cluster_num in clusters:
        print cluster_num
        r[cluster_num]={'conditions':clusters[cluster_num]}
        where_=np.hstack((np.where(exposure_hits==el)[0] for el in clusters[cluster_num]))
        
        batch_names = who_hits[where_]
        
        curr_dist=np.hstack((M[j] for j in where_))
        curr_who=[(batch_names[0], '') for k in range(M.shape[1])]
        
        curr_conditions=list(who_Mitocheck) if not taking_siRNAs else list(genes)
        for name in batch_names[1:]:
            curr_who.extend([(name, '') for k in range(M.shape[1])])
            if not taking_siRNAs:
                curr_conditions.extend(who_Mitocheck)
            else:
                curr_conditions.extend(genes)
        print curr_dist.shape
        if num_permutations is not None and past_cluster_num!=len(batch_names):
    #If there are only five experiments that are different between the two cluster length, then no need to redo the random rank product computation
            random_result=None
        curr_res=computeRPpvalues(curr_dist, np.array(curr_who), 
                                                       conditions=np.array(curr_conditions), 
                                                       technical_replicates_key=np.median,
                                                       xb_screen=False, 
                                                         num_permutations=num_permutations, reverse=False, 
                                                         batch_names=batch_names, random_result=random_result,
                                                         signed=False)
        #donc curr_res est [(gene, rank value)]
        if num_permutations is None and gsea:
            writeGSEARankingFile(curr_res, filename.format(cluster_num))
            
        else:
            if len(curr_res)==2: 
    #this means that we have the p-values
                curr_res, random_result=curr_res
                curr_res=sorted(curr_res, key=itemgetter(-1))
                pval=np.array(np.array(curr_res)[:,-1], dtype=float)
                if not taking_siRNAs:
                    currG=[dictSiEntrez[yqualdict[e]] for e in np.array(curr_res)[np.where(pval<=threshold)][:,0]]
                else:
                    currG=[e for e in np.array(curr_res)[np.where(pval<=threshold)][:,0]]
                print sorted(currG)
            else:
    #this means that we're working with the rank product values
                curr_res=sorted(curr_res, key=itemgetter(-1))
                if not taking_siRNAs:
                    currG=[dictSiEntrez[yqualdict[e]] for e in np.array(curr_res)[:threshold][:,0]]
                else:
                    currG=[e for e in np.array(curr_res)[:threshold][:,0]]
                print sorted(currG)
                
            past_cluster_num=len(batch_names)
        
        r[cluster_num]['genes']=[trad[el] for el in currG]
        
        r[cluster_num]['result']=[(el[0],el[-1]) for el in curr_res]
    multipleGeneListsToFile([r[k]['genes'] for k in r], [k for k in r], name=filename)
    
    return r
예제 #11
0
def globalSummaryScript(baseName,  siRNAFile,
                        n_clusters_min, n_clusters_max,
                       div_name,  lambda_,  weights, 
                       bins_type,  bin_size,  cost_type,
                       batch_size,  n_init,  init, 
                       ddim, iter_=0):
    
    f=open(siRNAFile, 'r')
    siRNAList = pickle.load(f); f.close()
    
    siExpDict = expSi(qc = quality_control_file , sens=0)
    jobCount = 0
    i=0
    total_expList = []
    head = """#!/bin/sh
cd %s""" %progFolder
    baseName = baseName+'{}{}_w{}_{}_{}_{}'.format(iter_,div_name[:5], weights, bins_type, bin_size, cost_type)
#A. DEALING WITH EXPERIMENTS
    for siRNA in siRNAList:
        try:
            expList = siExpDict[siRNA]
        except KeyError:
            print "siRNA not in siRNA-experiment dictionary"
        else:
            expList = strToTuple(expList, os.listdir(data_folder))
            total_expList.extend(expList)
            for plate, well in expList:        
                jobCount += 1; i+=1
                cmd = plateWellSummaryScript(plate, well, div_name, lambda_, weights, bins_type, bin_size, cost_type, batch_size, n_init, init, ddim, iter_)

                # this is now written to a script file (simple text file)
                # the script file is called ltarray<x>.sh, where x is 1, 2, 3, 4, ... and corresponds to the job index.
                script_name = os.path.join(scriptFolder, baseName+'{}.sh'.format(i))
                script_file = file(script_name, "w")
                script_file.write(head + cmd)
                script_file.close()
        
                # make the script executable (without this, the cluster node cannot call it)
                os.system('chmod a+x %s' % script_name)
    
#B. DEALING WITH CONTROLS
    ctrlExp = appendingControl(total_expList)
    ctrlExp = countingDone(ctrlExp)
    np.random.shuffle(ctrlExp)
    ctrlExp=ctrlExp[:int(0.2*len(total_expList))]
    for plate, well in ctrlExp:
        jobCount += 1; i+=1
        cmd = plateWellSummaryScript(plate, well, div_name, lambda_, weights, bins_type, bin_size, cost_type, batch_size, n_init, init, ddim, iter_)

        # this is now written to a script file (simple text file)
        # the script file is called ltarray<x>.sh, where x is 1, 2, 3, 4, ... and corresponds to the job index.
        script_name = os.path.join(scriptFolder, baseName+'{}.sh'.format(i))
        script_file = file(script_name, "w")
        script_file.write(head + cmd)
        script_file.close()

        # make the script executable (without this, the cluster node cannot call it)
        os.system('chmod a+x %s' % script_name)
    
            # write the main script
    array_script_name = '%s.sh' % os.path.join(scriptFolder, baseName)
    main_script_file = file(array_script_name, 'w')
    main_content = """#!/bin/sh
%s
#$ -o %s
#$ -e %s
%s$%s.sh
""" % (path_command,
       pbsOutDir,  
       pbsErrDir, 
       os.path.join(scriptFolder, baseName),
       pbsArrayEnvVar)

    main_script_file.write(main_content)
    main_script_file.close()
    os.system('chmod a+x %s' % array_script_name)
    sub_cmd = 'qsub -t 1-%i %s' % (jobCount, array_script_name)

    print sub_cmd
    
#C. DOING EXPERIMENT CLUSTERING STEP
    expFilename = 'exp_Simpson_{}.pkl'.format(int(time.time()))
    total_expList.extend(ctrlExp)
    f=open(expFilename, 'w')
    pickle.dump(total_expList, f)
    f.close()
    baseName = baseName+'_clustering'
    for n_clusters in range(n_clusters_min, n_clusters_max):
        script_name = os.path.join(scriptFolder, baseName+'{}.sh'.format(n_clusters-n_clusters_min))
        script_file = file(script_name, "w")
        cmd="""
    python tracking/histograms/summarization_clustering.py -a clustering --experimentFile %s -k %i --ddimensional %i --bins_type %s --cost_type %s --bin_size %i --div_name %s -w %i --init %s --batch_size %i --iter %i
    """
        cmd %= (
                expFilename,
                n_clusters,
                 ddim,
                 bins_type,
                 cost_type,
                 bin_size,
                 div_name,
                 weights,
                 init,
                 batch_size,
                 iter_
            )
        script_file.write(head + cmd)
        script_file.close()
        os.system('chmod a+x %s' % script_name)
    
                # write the main script
    array_script_name = '%s.sh' % os.path.join(scriptFolder, baseName)
    main_script_file = file(array_script_name, 'w')
    main_content = """#!/bin/sh
%s
#$ -o %s
#$ -e %s
%s$%s.sh
""" % (path_command,
       pbsOutDir,  
       pbsErrDir, 
       os.path.join(scriptFolder, baseName),
       pbsArrayEnvVar)

    main_script_file.write(main_content)
    main_script_file.close()
    os.system('chmod a+x %s' % array_script_name)
    sub_cmd = 'qsub -hold_jid  -t 1-%i %s' % (n_clusters_max - n_clusters_min, array_script_name)

    print sub_cmd
    
#D. GOING BACK TO EXPERIMENTS AND TESTING IF DIFFERENT FROM CONTROLS
    
    return 1
예제 #12
0
def hitFinderScript(baseName, siRNAFile, testCtrl=False, iter_=0):
    f=open(siRNAFile, 'r')
    siRNAList = pickle.load(f); f.close()
    jobCount = 0
    head = """#!/bin/sh
cd %s""" %progFolder
    baseName+='{}'.format(iter_)
    if testCtrl:
        baseName = baseName+'CTRL'
        expList = []
        yqualDict=expSi(quality_control_file, sens=0)
        for siRNA in siRNAList:
            expList.extend(strToTuple(yqualDict[siRNA], os.listdir(data_folder)))
        plates = Counter(np.array(expList)[:,0]).keys()
        for i,plate in enumerate(plates):
            jobCount+=1; i+=1
            cmd = '''
python tracking/histograms/summarization_clustering.py -a hitFinder --verbose 0 --testCtrl %s --iter %i
    '''
            cmd%=(plate, iter_)
            # this is now written to a script file (simple text file)
            # the script file is called ltarray<x>.sh, where x is 1, 2, 3, 4, ... and corresponds to the job index.
            script_name = os.path.join(scriptFolder, baseName+'{}.sh'.format(i))
            script_file = file(script_name, "w")
            script_file.write(head + cmd)
            script_file.close()
    
            # make the script executable (without this, the cluster node cannot call it)
            os.system('chmod a+x %s' % script_name)
    
    else:
        for i,siRNA in enumerate(siRNAList):
            jobCount+=1; i+=1
            cmd = '''
python tracking/histograms/summarization_clustering.py -a hitFinder --siRNA %s --verbose 0 --iter %i
    '''
            cmd%=(siRNA, iter_)
            # this is now written to a script file (simple text file)
            # the script file is called ltarray<x>.sh, where x is 1, 2, 3, 4, ... and corresponds to the job index.
            script_name = os.path.join(scriptFolder, baseName+'{}.sh'.format(i))
            script_file = file(script_name, "w")
            script_file.write(head + cmd)
            script_file.close()
    
            # make the script executable (without this, the cluster node cannot call it)
            os.system('chmod a+x %s' % script_name)
            
    # write the main script
    array_script_name = '%s.sh' % os.path.join(scriptFolder, baseName)
    main_script_file = file(array_script_name, 'w')
    main_content = """#!/bin/sh
%s
#$ -o %s
#$ -e %s
%s$%s.sh
""" % (path_command,
       pbsOutDir,  
       pbsErrDir, 
       os.path.join(scriptFolder, baseName),
       pbsArrayEnvVar)

    main_script_file.write(main_content)
    main_script_file.close()
    os.system('chmod a+x %s' % array_script_name)
    sub_cmd = 'qsub -t 1-%i %s' % (jobCount, array_script_name)

    print sub_cmd
        
    return