Пример #1
0
def storeMccTotalEner():
    import sys
    lst_fn = sys.argv[1]
    complx_lst = getLst(lst_fn)

    for complx in complx_lst:
        extractMccTotalEner(complx)
Пример #2
0
def lstCmp(path1, path2):
    list1 = getLst(path1)
    list2 = getLst(path2)
    common = []
    unique1 = []
    unique2 = []
    for item in list1:
        if item in list2:
            common.append(item)
        else:
            unique1.append(item)

    for item in list2:
        if not item in list1:
            unique2.append(item)

    return common, unique1, unique2
Пример #3
0
def subZ_ScoreSvr_tasks():
    """submit the loose grid jobs for svr parameter selection using z_score data set"""
    import lst_sub
    import sys
    filename = sys.argv[1]
    jobs = lst_sub.getLst(filename)
    tasks = Z_Svr_tasks(jobs, task_fn = filename)
    tasks.displayJobs()
    tasks.subLooseGrid()
Пример #4
0
def xargs(lstfn, pyscpt):
    """emulate the linux xargs fucntion, use each entry as the argument for the script
        Arguments:
    - `lstfn`: name of the list file
    - `pyscpt`: python scirpt
    """
    lst = getLst(lstfn)
    os.environ["script"] = pyscpt
    for task in lst:
        os.environ["task"] = task
        os.system("python $script $task")
        time.sleep(5)
Пример #5
0
def storeMccTotalEnerParallelly(lst_fn):
    """
    parallel version of storeMccTotalEner using multiprocessing
    """
    from multiprocessing import Pool

    complx_lst = getLst(lst_fn)

    pool = Pool()
    pool.map(extractMccTotalEner, complx_lst)
    
    pool.close()
    pool.join()
Пример #6
0
 def getIndividualPcc(self):
     """
     calculate the PCC of each complexes' pre-cluster data
     """
     from lst_sub import getLst
     cmplxes = getLst(self.lstfn)
     for cmplx in cmplxes:
         ener_fn = cmplx + '-A-' + self.ener_extd
         mcc_fn = cmplx + '-A-' + self.mcc_extd
         ener_dt = pd.read_csv(ener_fn, sep='\s+')
         mcc_dt = pd.read_csv(mcc_fn, header=None)
         mcc_dt.columns = ['mcc']
         dt = pd.concat([ener_dt, mcc_dt], axis=1)
         dt = dt.sort(columns=['mcc'])
         print "for the complex", cmplx
         preClusterAna.calcuPCC(dt)
Пример #7
0
 def constructTotalDt(self):
     """construc the total data frame of all the complexes
     """
     from lst_sub import getLst
     cmplxes = getLst(self.lstfn)
     dts = []
     for cmplx in cmplxes:
         ener_fn = cmplx + '-A-' + self.ener_extd
         mcc_fn = cmplx + '-A-' + self.mcc_extd
         ener_dt = pd.read_csv(ener_fn, sep='\s+')
         mcc_dt = pd.read_csv(mcc_fn, header=None)
         mcc_dt.columns = ['mcc']
         dt = pd.concat([ener_dt, mcc_dt], axis=1)
         dt = dt.sort(columns=['mcc'])
         dts.append(dt)
     self.total_dt = pd.concat(dts)
     self.total_dt = self.total_dt.sort(columns=['mcc'])
     self.total_dt.columns = self.total_columns
Пример #8
0
def getReps_bk(clustering_rslt_file):
    """
    read the clustering results, return the representatives from each cluster
    """
    print "clustering result\t", clustering_rslt_file

    clustering_rslt = getLst(clustering_rslt_file)
    clustering_rslt = [int(i) for i in clustering_rslt]

    total_grps = max(clustering_rslt) + 1
    total_pts = len(clustering_rslt)

    mappings = zip(range(total_pts), clustering_rslt)

    belongings = []
    for grp_num in range(total_grps):
        grp_member = [mapping[0] for mapping in mappings if mapping[1] == grp_num]
        belongings.append({str(grp_num): grp_member})

    reps = []
    for belonging in belongings:
        reps.append(random.choice(belonging.values()[0]))

    return reps
Пример #9
0
import time

class svm_tasks:
    def __init__(self, tasks, task_fn = ' '):
        self.tasks = tasks
        self.task_fn = task_fn

    def displayJobs(self):
        """print the name of the jobs"""
        print "get tasks from ", self.task_fn
        for job_name in self.tasks:
            print job_name
            
    def subFineGrid(self):
        """submit the fine grid job for svc parameter selection"""
        for subset in self.tasks:
            time.sleep(3)
            os.environ['subset'] = subset
            print "submit fine grid job: ", subset
            os.system('qsub -v var1=$subset -N $subset fine_grid.pbs')

if __name__ == '__main__':
    '''iterate the list of orders'''
    import lst_sub
    import sys
    filename = sys.argv[1]
    jobs = lst_sub.getLst(filename)
    tasks = svm_tasks(jobs, task_fn = filename)
    tasks.displayJobs()
    tasks.subFineGrid()
Пример #10
0
from lst_sub import getLst
import NB_classifier as nb

ener_rows_ifn = 'ener_row_name.txt'
ff = '08ff'
low_condi_dist_fn = ff + '_low_decoy.dist'
high_condi_dist_fn = ff + '_high_decoy.dist'
bayes_dist_ofn = ff + '_bayes.txt'

low_condi_dist = nb.loadCondiDistribution(low_condi_dist_fn)
high_condi_dist = nb.loadCondiDistribution(high_condi_dist_fn)
ener_rows = getLst(ener_rows_ifn)

def convertPdfName(dist_tuple):
    """conver the first letter of the distribution name to upper case
    """
    name = dist_tuple[0]
    name = name.upper()[0] + name[1:]
    return (name, dist_tuple[1])
    
################################################################################
# converting
low_condi_dist = [convertPdfName(i) for i in low_condi_dist]
high_condi_dist = [convertPdfName(i) for i in high_condi_dist]
################################################################################

high_bayes_dists = [[ener_rows[i], high_condi_dist[i][0], high_condi_dist[i][1][0], high_condi_dist[i][1][1]]
                    for i in range(len(ener_rows))]
low_bayes_dists = [[ener_rows[i], low_condi_dist[i][0], low_condi_dist[i][1][0], low_condi_dist[i][1][1]]
                   for i in range(len(ener_rows))]
Пример #11
0
                    dset = f[group_path]
                    dset.attrs["prt_conf"] = prt_conf
                    dset.attrs["lig_conf"] = lig_conf
                    dset.create_dataset(str(rep_num), data=dt)
                f.flush()
                f.close()


if __name__ == "__main__":

    ################################################################################
    # # extract track
    from multiprocessing import Pool

    lst_fn = "exp_lst"
    complx_lst = getLst(lst_fn)

    # for complx in complx_lst:
    #     extractTrack(complx)

    pool = Pool()
    pool.map(extractTrack, complx_lst)

    pool.close()
    pool.join()

    ################################################################################
    # load mcc and energy components
    # lst_fn = "exp_lst"
    # complx_lst = getLst(lst_fn)
Пример #12
0
from pprint import pprint
import subprocess
from glob import glob
from lst_sub import getLst

lst_fn = 'total_lst'
rows = str(10001)

complxes = getLst(lst_fn)

incomplete_results = []

for complx in complxes:
    matxs = glob(complx + '/*.trace.mat')
    if len(matxs) == 10:
        for matx in matxs:
            wc = subprocess.check_output(['wc', '-l', matx])
            total_row = wc.split(' ')[0]
            if total_row != rows:
                incomplete_results.append(complx)
                print complx
                break
    else:
        incomplete_results.append(complx)
        print complx
        

pprint(incomplete_results)
Пример #13
0
import sys
import multi_process
from lst_sub import getLst


complx_lst = sys.argv[1]
complxes = getLst(complx_lst)


cmds = [('python', '/home/jaydy/Workspace/script/Pyscripts/prepare_cluster.py', complx) 
        for complx in complxes]

for cmd in cmds:
    print cmd
multi_process.multi_process(cmds, 8)
Пример #14
0
        """
        f = h5py.File(self.h5_path)
        if self.non_centralized_path in f:
            del f[self.non_centralized_path]
        f.create_group(self.non_centralized_path)
        subgroup = f[self.non_centralized_path]
        subgroup.create_dataset('low_decoy', data=self.low_dt.values)
        subgroup.create_dataset('high_decoy', data=self.high_dt.values)
        
        f.flush()
        f.close()
        
if __name__ == "__main__":
    import sys
    jobs_fn = sys.argv[1]
    complxes = getLst(jobs_fn)
    
    multi_decoys = MultiComplexesDecoy(complxes)
    ################################################################################
    # collect raw data
    multi_decoys.cleanH5()
    multi_decoys.vstackAll()

    ################################################################################
    # prepare for linear force field
    # multi_decoys.processMccEner(centralized=True, normed=True)
    # multi_decoys.splitHighLow()
    # multi_decoys.dumpLinearMccEner()
    # multi_decoys.printMccEner()

    ################################################################################
Пример #15
0
 def loadClusteringRslts(self):
     self.rslts = getLst(self.clust_rslt_fn)
     self.rslts = [int(i) for i in self.rslts]