def loadResults_Mitocheck(self,exp_list):
     '''
     Here we're loading results on a per experiment basis. This will be interesting to look at distances between experiments
     based on phenotypes, vs distances based on trajectory types.
     '''
     if len(exp_list[0])!=2:
         exp_list=strToTuple(exp_list, os.listdir(self.settings.outputFolder))
     result = None; i=0; missed=[]
     for pl,w in exp_list:
         print i,
         
         try:
             f=open(os.path.join(self.settings.outputFolder,pl, self.settings.outputFile.format(pl[:10], w)), 'r')
             pheno_seq_list, mask = pickle.load(f)
             f.close()
         except:
             print "Loading error for ", pl, w
             missed.append(i)
             continue
         else:
             pheno_seq_list = np.sum( np.array([np.bincount(pheno_seq_list[j], minlength=17) for j in range(len(pheno_seq_list)) if j not in mask]), 0)[:-2]
         #15 and 16 are respectively out of focus and artefact objects. We don't want them
             pheno_seq_list=pheno_seq_list/float(np.sum(pheno_seq_list))
             result = np.vstack((result, pheno_seq_list)) if result is not None else pheno_seq_list
         finally:
             i+=1
             
     print "Saving"
     
     f=open(os.path.join(self.settings.outputFolder,self.settings.outputFile.format("ALL", "hit_exp")), 'w')
     pickle.dump((result, missed),f); f.close()
     return
示例#2
0
def scriptCommand(exp_list, baseName='comp_track', command="tracking/trajPack/cell_cycle.py",jobSize=10,
                  h5_result_dir="/share/data20T/mitocheck/Alice/results",
                  max_nodes=500,
                   **kwargs):
    perExperiment=False
    if type(exp_list[0])!=int:
        perExperiment=True
        
        if len(exp_list[0])!=2:
            exp_list=strToTuple(exp_list, os.listdir(h5_result_dir))
    
    fileNumber = int(len(exp_list)/float(jobSize))+1
    
    head = """#!/bin/sh
cd %s""" %progFolder
    for keyword in kwargs:
        command = command +" -{} {}".format(keyword, kwargs[keyword])
    
    for k in range(fileNumber):
        cmd = ''
        for exp in exp_list[jobSize*k:jobSize*(k+1)]:
            if perExperiment:
                pl,w=exp
                temp_cmd = """
python %s -p %s -w %s"""
                temp_cmd %= (
                        command,
                        pl,
                        w
                        )
            else:
                temp_cmd = """
python %s -i %i"""
                temp_cmd %= (
                        command,
                        exp
                        )
    
            cmd += temp_cmd
    
        # this is now written to a script file (simple text file)
        # the script file is called ltarray<x>.sh, where x is 1, 2, 3, 4, ... and corresponds to the job index.
        script_name = os.path.join(scriptFolder, '%s%i.sh' % (baseName, k+1))
        script_file = file(script_name, "w")
        script_file.write(head + cmd)
        script_file.close()

        # make the script executable (without this, the cluster node cannot call it)
        os.system('chmod a+x %s' % script_name)
        
        # write the main script
    array_script_name = '%s.sh' % os.path.join(scriptFolder, baseName)
    main_script_file = file(array_script_name, 'w')
    main_content = """#!/bin/sh
%s
#$ -o %s
#$ -e %s
%s$%s.sh
""" % (path_command,
       pbsOutDir,  
       pbsErrDir, 
       os.path.join(scriptFolder, baseName),
       pbsArrayEnvVar)

    main_script_file.write(main_content)
    os.system('chmod a+x %s' % array_script_name)

    sub_cmd = 'qsub -tc %i -t 1-%i %s' % (max_nodes, fileNumber, array_script_name)

    print sub_cmd
    return 1
def globalSummaryScript(baseName,  siRNAFile,
                        n_clusters_min, n_clusters_max,
                       div_name,  lambda_,  weights, 
                       bins_type,  bin_size,  cost_type,
                       batch_size,  n_init,  init, 
                       ddim, iter_=0):
    
    f=open(siRNAFile, 'r')
    siRNAList = pickle.load(f); f.close()
    
    siExpDict = expSi(qc = quality_control_file , sens=0)
    jobCount = 0
    i=0
    total_expList = []
    head = """#!/bin/sh
cd %s""" %progFolder
    baseName = baseName+'{}{}_w{}_{}_{}_{}'.format(iter_,div_name[:5], weights, bins_type, bin_size, cost_type)
#A. DEALING WITH EXPERIMENTS
    for siRNA in siRNAList:
        try:
            expList = siExpDict[siRNA]
        except KeyError:
            print "siRNA not in siRNA-experiment dictionary"
        else:
            expList = strToTuple(expList, os.listdir(data_folder))
            total_expList.extend(expList)
            for plate, well in expList:        
                jobCount += 1; i+=1
                cmd = plateWellSummaryScript(plate, well, div_name, lambda_, weights, bins_type, bin_size, cost_type, batch_size, n_init, init, ddim, iter_)

                # this is now written to a script file (simple text file)
                # the script file is called ltarray<x>.sh, where x is 1, 2, 3, 4, ... and corresponds to the job index.
                script_name = os.path.join(scriptFolder, baseName+'{}.sh'.format(i))
                script_file = file(script_name, "w")
                script_file.write(head + cmd)
                script_file.close()
        
                # make the script executable (without this, the cluster node cannot call it)
                os.system('chmod a+x %s' % script_name)
    
#B. DEALING WITH CONTROLS
    ctrlExp = appendingControl(total_expList)
    ctrlExp = countingDone(ctrlExp)
    np.random.shuffle(ctrlExp)
    ctrlExp=ctrlExp[:int(0.2*len(total_expList))]
    for plate, well in ctrlExp:
        jobCount += 1; i+=1
        cmd = plateWellSummaryScript(plate, well, div_name, lambda_, weights, bins_type, bin_size, cost_type, batch_size, n_init, init, ddim, iter_)

        # this is now written to a script file (simple text file)
        # the script file is called ltarray<x>.sh, where x is 1, 2, 3, 4, ... and corresponds to the job index.
        script_name = os.path.join(scriptFolder, baseName+'{}.sh'.format(i))
        script_file = file(script_name, "w")
        script_file.write(head + cmd)
        script_file.close()

        # make the script executable (without this, the cluster node cannot call it)
        os.system('chmod a+x %s' % script_name)
    
            # write the main script
    array_script_name = '%s.sh' % os.path.join(scriptFolder, baseName)
    main_script_file = file(array_script_name, 'w')
    main_content = """#!/bin/sh
%s
#$ -o %s
#$ -e %s
%s$%s.sh
""" % (path_command,
       pbsOutDir,  
       pbsErrDir, 
       os.path.join(scriptFolder, baseName),
       pbsArrayEnvVar)

    main_script_file.write(main_content)
    main_script_file.close()
    os.system('chmod a+x %s' % array_script_name)
    sub_cmd = 'qsub -t 1-%i %s' % (jobCount, array_script_name)

    print sub_cmd
    
#C. DOING EXPERIMENT CLUSTERING STEP
    expFilename = 'exp_Simpson_{}.pkl'.format(int(time.time()))
    total_expList.extend(ctrlExp)
    f=open(expFilename, 'w')
    pickle.dump(total_expList, f)
    f.close()
    baseName = baseName+'_clustering'
    for n_clusters in range(n_clusters_min, n_clusters_max):
        script_name = os.path.join(scriptFolder, baseName+'{}.sh'.format(n_clusters-n_clusters_min))
        script_file = file(script_name, "w")
        cmd="""
    python tracking/histograms/summarization_clustering.py -a clustering --experimentFile %s -k %i --ddimensional %i --bins_type %s --cost_type %s --bin_size %i --div_name %s -w %i --init %s --batch_size %i --iter %i
    """
        cmd %= (
                expFilename,
                n_clusters,
                 ddim,
                 bins_type,
                 cost_type,
                 bin_size,
                 div_name,
                 weights,
                 init,
                 batch_size,
                 iter_
            )
        script_file.write(head + cmd)
        script_file.close()
        os.system('chmod a+x %s' % script_name)
    
                # write the main script
    array_script_name = '%s.sh' % os.path.join(scriptFolder, baseName)
    main_script_file = file(array_script_name, 'w')
    main_content = """#!/bin/sh
%s
#$ -o %s
#$ -e %s
%s$%s.sh
""" % (path_command,
       pbsOutDir,  
       pbsErrDir, 
       os.path.join(scriptFolder, baseName),
       pbsArrayEnvVar)

    main_script_file.write(main_content)
    main_script_file.close()
    os.system('chmod a+x %s' % array_script_name)
    sub_cmd = 'qsub -hold_jid  -t 1-%i %s' % (n_clusters_max - n_clusters_min, array_script_name)

    print sub_cmd
    
#D. GOING BACK TO EXPERIMENTS AND TESTING IF DIFFERENT FROM CONTROLS
    
    return 1
def hitFinderScript(baseName, siRNAFile, testCtrl=False, iter_=0):
    f=open(siRNAFile, 'r')
    siRNAList = pickle.load(f); f.close()
    jobCount = 0
    head = """#!/bin/sh
cd %s""" %progFolder
    baseName+='{}'.format(iter_)
    if testCtrl:
        baseName = baseName+'CTRL'
        expList = []
        yqualDict=expSi(quality_control_file, sens=0)
        for siRNA in siRNAList:
            expList.extend(strToTuple(yqualDict[siRNA], os.listdir(data_folder)))
        plates = Counter(np.array(expList)[:,0]).keys()
        for i,plate in enumerate(plates):
            jobCount+=1; i+=1
            cmd = '''
python tracking/histograms/summarization_clustering.py -a hitFinder --verbose 0 --testCtrl %s --iter %i
    '''
            cmd%=(plate, iter_)
            # this is now written to a script file (simple text file)
            # the script file is called ltarray<x>.sh, where x is 1, 2, 3, 4, ... and corresponds to the job index.
            script_name = os.path.join(scriptFolder, baseName+'{}.sh'.format(i))
            script_file = file(script_name, "w")
            script_file.write(head + cmd)
            script_file.close()
    
            # make the script executable (without this, the cluster node cannot call it)
            os.system('chmod a+x %s' % script_name)
    
    else:
        for i,siRNA in enumerate(siRNAList):
            jobCount+=1; i+=1
            cmd = '''
python tracking/histograms/summarization_clustering.py -a hitFinder --siRNA %s --verbose 0 --iter %i
    '''
            cmd%=(siRNA, iter_)
            # this is now written to a script file (simple text file)
            # the script file is called ltarray<x>.sh, where x is 1, 2, 3, 4, ... and corresponds to the job index.
            script_name = os.path.join(scriptFolder, baseName+'{}.sh'.format(i))
            script_file = file(script_name, "w")
            script_file.write(head + cmd)
            script_file.close()
    
            # make the script executable (without this, the cluster node cannot call it)
            os.system('chmod a+x %s' % script_name)
            
    # write the main script
    array_script_name = '%s.sh' % os.path.join(scriptFolder, baseName)
    main_script_file = file(array_script_name, 'w')
    main_content = """#!/bin/sh
%s
#$ -o %s
#$ -e %s
%s$%s.sh
""" % (path_command,
       pbsOutDir,  
       pbsErrDir, 
       os.path.join(scriptFolder, baseName),
       pbsArrayEnvVar)

    main_script_file.write(main_content)
    main_script_file.close()
    os.system('chmod a+x %s' % array_script_name)
    sub_cmd = 'qsub -t 1-%i %s' % (jobCount, array_script_name)

    print sub_cmd
        
    return