Exemplo n.º 1
0
def dataer(Dataqueue):
    from TensorMol import MSet
    Trainingset = MSet(GPARAMS.Compute_setting.Traininglevel)
    Trainingset.Load()
    Trainingset.mols = Check_MSet(Trainingset.mols, level=1)
    Trainingset.Save()
    if GPARAMS.Esoinn_setting.Ifresp:
        respset = MSet('HF_resp')
        respset.Load()
        respset.mols = Check_MSet(respset.mols, level=1)
        respset.Save()
    print("Trainingset.mols :", len(Trainingset.mols))
    ClusNum = max(GPARAMS.Esoinn_setting.Model.class_id,
                  GPARAMS.Train_setting.Modelnumperpoint)
    print("++++++++++++++++++Dataer++++++++++++++++++++++")
    print("ClusNum:", ClusNum)
    SubTrainList = []
    for i in range(ClusNum):
        SubTrainSet = MSet(GPARAMS.Compute_setting.Traininglevel +
                           '_Cluster%d' % i)
        SubTrainList.append(SubTrainSet)
    print('start make cluster for training set')
    for i in range(len(Trainingset.mols)):
        try:
            EGCM=(Trainingset.mols[i].EGCM-GPARAMS.Esoinn_setting.scalemin)/\
                    (GPARAMS.Esoinn_setting.scalemax-GPARAMS.Esoinn_setting.scalemin)
        except:
            EGCM=(Trainingset.mols[i].Cal_EGCM()-GPARAMS.Esoinn_setting.scalemin)/\
                    (GPARAMS.Esoinn_setting.scalemax-GPARAMS.Esoinn_setting.scalemin)
        EGCM[~np.isfinite(EGCM)] = 0
        if GPARAMS.Esoinn_setting.Model.class_id >= GPARAMS.Train_setting.Modelnumperpoint:
            list = GPARAMS.Esoinn_setting.Model.find_closest_cluster(
                min(GPARAMS.Train_setting.Modelnumperpoint,
                    GPARAMS.Esoinn_setting.Model.class_id), EGCM)
        else:
            list = [i for i in range(GPARAMS.Train_setting.Modelnumperpoint)]
        for j in list:
            SubTrainList[j].mols.append(Trainingset.mols[i])
    for i in range(ClusNum):
        print("Cluster %d has %d mols" % (i, len(SubTrainList[i].mols)))
    for i in range(ClusNum):
        othermollist = []
        for j in range(ClusNum):
            if j != i and len(SubTrainList[j].mols) > 2:
                othermollist += SubTrainList[j].mols
        print("Other mol list for Cluster %d" % i, len(othermollist))
        if len(othermollist) > 0:
            samplenum=min(\
                          math.ceil((len(Trainingset.mols)-len(SubTrainList[i].mols))*GPARAMS.Esoinn_setting.Mixrate),\
                          len(othermollist)\
                         )
            print(len(othermollist), samplenum)
            SubTrainList[i].mols += random.sample(othermollist, samplenum)
        SubTrainList[i].Save()
    for i in range(ClusNum):
        Dataqueue.put((SubTrainList[i], i, GPARAMS.Train_setting.Maxsteps))
        print('%dth cluster is put in queue, mol num: %d!' %
              (i, len(SubTrainList[i].mols)))
Exemplo n.º 2
0
def consumer(Queue):
    import time
    from ..Base import Molnew
    import os
    from TensorMol import MSet
    print("Consumer start")
    Newaddedset = MSet('Stage_%d_Newadded' % GPARAMS.Train_setting.Trainstage)
    num = 0
    while True:
        ERROR_mols = Queue.get()
        if ERROR_mols == None:
            break
        for i in range(len(ERROR_mols)):
            ERROR_mols[i][0].name = "Stage_%d_Mol_%d" % (
                GPARAMS.Train_setting.Trainstage, num)
            Newaddedset.mols.append(ERROR_mols[i][0])
        num += 1
        if num % 2000 == 0:
            Newaddedset.Save()
    Dataset = []
    Newaddedset.mols = Check_MSet(Newaddedset.mols)
    if len(
            GPARAMS.Esoinn_setting.Model.nodes
    ) != 0 and GPARAMS.Esoinn_setting.Model.class_id > GPARAMS.Train_setting.Modelnumperpoint:
        for i in Newaddedset.mols:
            try:
                Dataset.append(i.EGCM)
            except:
                Dataset.append(i.Cal_EGCM())
        a, b, c, d, signalmask = GPARAMS.Esoinn_setting.Model.predict(Dataset)
        normalmollist = []
        edgemollist = []
        noisemollist = []
        for i in range(len(Newaddedset.mols)):
            if signalmask[i] == 'Noise':
                noisemollist.append(Newaddedset.mols[i])
            if signalmask[i] == 'Edge':
                edgemollist.append(Newaddedset.mols[i])
            if signalmask[i] == 'Normal':
                normalmollist.append(Newaddedset.mols[i])
        if len(Newaddedset.mols) > 1000:
            edgemollist = random.sample(edgemollist,
                                        min(600, len(edgemollist)))
            noisemollist = random.sample(noisemollist,
                                         min(200, len(noisemollist)))
            normalmollist = random.sample(normalmollist,
                                          min(20, len(normalmollist)))
            Newaddedset.mols = edgemollist + noisemollist + normalmollist
    else:
        if len(Newaddedset.mols) > 1000:
            Newaddedset.mols = random.sample(Newaddedset.mols, 1000)
    Newaddedset.Save()
    return
Exemplo n.º 3
0
def parallel_caljob(MSetname, manager, ctrlfile):
    para_path = './'
    if GPARAMS.Compute_setting.Traininglevel == "DFTB+":
        os.environ[
            "OMP_NUM_THREADS"] = GPARAMS.Compute_setting.Ncoresperthreads
        para_path = GPARAMS.Software_setting.Dftbparapath
    input_path = './' + GPARAMS.Compute_setting.Traininglevel + '/Consumer/'
    if not os.path.exists(input_path):
        os.system("mkdir -p " + input_path)
    TMPSet = MSet(MSetname)
    TMPSet.Load()
    mols = TMPSet.mols
    print('Nmols in Newaddedset:', len(mols))
    if GPARAMS.Train_setting.Ifwithhelp == True:
        nstage = math.ceil(len(mols) / GPARAMS.Train_setting.framenumperjob)
        print(nstage)
        submollist = [
            mols[i * GPARAMS.Train_setting.framenumperjob:(i + 1) *
                 GPARAMS.Train_setting.framenumperjob] for i in range(nstage)
        ]
        subMSetlist = [MSet(MSetname + '_part%d' % i) for i in range(nstage)]
        subMSetresult = [False for i in range(nstage)]
        for i in range(nstage):
            subMSetlist[i].mols = submollist[i]
            subMSetlist[i].Save()
        trans = pko.Transport((GPARAMS.Train_setting.helpcpunodeip,
                               GPARAMS.Train_setting.helpcpuport))
        trans.connect(username=GPARAMS.Train_setting.helpcpuaccount,
                      password=GPARAMS.Train_setting.helpcpupasswd)
        ssh = pko.SSHClient()
        ssh._transport = trans
        sftp = pko.SFTPClient.from_transport(trans)
        workpath = os.getcwd()
        print(workpath)
        for i in range(nstage):
            subMSetlist[i].mols = submollist[i]
            subMSetlist[i].Save()
            remotepath = GPARAMS.Train_setting.helpcpupath + '/' + MSetname + '/part%d' % i
            srcpath = workpath + '/datasets/%s.pdb' % (MSetname +
                                                       '_part%d' % i)
            print(" Put pdb file:")
            print(remotepath, srcpath)
            stdin, stdout, stderr = ssh.exec_command('mkdir -p %s/datasets' %
                                                     remotepath)
            print(stdout.read().decode)
            sftp.put(
                srcpath,
                remotepath + '/datasets/%s.pdb' % (MSetname + '_part%d' % i))
            if GPARAMS.Train_setting.queuetype == 'PBS':
                pbsrun = open('pbs.run', 'w')
                pbsrun.write(
                    pbsstr %
                    (GPARAMS.Compute_setting.Ncoresperthreads,
                     GPARAMS.Compute_setting.Traininglevel + "_%d" % i))
                pbsrun.write(GPARAMS.Train_setting.helpcpuenv)
                pbsrun.write("python -u Qmcal.py -i %s -d %s> %s.qmout\n" %
                             (ctrlfile, MSetname + '_part%d' % i,
                              MSetname + '_part%d' % i))
                pbsrun.write("rm *.chk\n")
                pbsrun.write("touch finished\n")
                pbsrun.close()
                sftp.put(localpath=workpath + '/pbs.run',
                         remotepath=remotepath + '/pbs.run')
                sftp.put(localpath=workpath + '/' + ctrlfile,
                         remotepath=remotepath + '/' + ctrlfile)
                #ssh.exec_command('cd %s && qsub pbs.run')
        t = 0
        while False in subMSetresult:
            time.sleep(300)
            t += 300
            for i in range(nstage):
                remotepath = GPARAMS.Train_setting.helpcpupath + '/' + MSetname + '/part%d' % i
                stdin, stdout, stderr = ssh.exec_command(
                    "cd %s && ls fininshed" % (remotepath))
                if 'finished' in stdout.read().decode():
                    subMSetresult[i] = True
            print(t, subMSetresult)
        finishmols = []
        subMSetlist = [MSet(MSetname + '_part%d' % i) for i in range(nstage)]
        for i in range(nstage):
            srcpath = workpath + '/datasets/%s.pdb' % (MSetname +
                                                       '_part%d' % i)
            remotepath = GPARAMS.Train_setting.helpcpupath + '/' + MSetname + '/part%d' % i
            os.system('rm %s' % srcpath)
            sftp.get(localpath=srcpath,
                     remotepath=remotepath + '/' + MSetname +
                     '_part%d.pdb' % i)
            subMSetlist[i].Load()
            finishmols += subMSetlist[i].mols
        for i in range(len(finishmols)):
            finishmols[i].Cal_EGCM()
        TMPSet.mols = finishmols
        TMPSet.Save()
    else:
        inpathlist = [input_path] * len(mols)
        parapathlist = [para_path] * len(mols)
        corenumperjob = [GPARAMS.Compute_setting.Ncoresperthreads] * len(mols)
        keywordslist = [GPARAMS.Compute_setting.Gaussiankeywords] * len(mols)
        Atomizationlist = [GPARAMS.Compute_setting.Atomizationlevel
                           ] * len(mols)
        inputlist = list(
            zip(mols, inpathlist, parapathlist, keywordslist, corenumperjob,
                Atomizationlist))
        paracal_pool = manager.Pool(GPARAMS.Compute_setting.Consumerprocessnum)
        results = paracal_pool.map(calculator, inputlist)
        paracal_pool.close()
        paracal_pool.join()
        mollist = []
        for i in range(len(results)):
            if results[i][0] == True:
                mollist.append(results[i][1])
                mollist[-1].Cal_EGCM()
        TMPSet.mols = mollist
        TMPSet.Save()
    return
Exemplo n.º 4
0
def esoinner(MSetname=''):
    from ..Comparm import GPARAMS 
    if_continue=True
    if len(GPARAMS.Esoinn_setting.Model.nodes)!=0: 
        cluster_center_before=GPARAMS.Esoinn_setting.Model.cal_cluster_center()
    else:
        cluster_center_before=None 
    if MSetname:
        TotalMSet=MSet(MSetname)
    else:
        TotalMSet=MSet(GPARAMS.Compute_setting.Traininglevel)
    TotalMSet.Load()
    for i in TotalMSet.mols:
        try:
            i.EGCM
        except:
            i.Cal_EGCM()
    TotalMSet.Save()
    Dataset=np.array([i.EGCM for i in TotalMSet.mols])
    try: 
    #if True:
        if not (GPARAMS.Esoinn_setting.scalemax and GPARAMS.Esoinn_setting.scalemin):
            print("++++++++++++++++++++++++++++++++++++++++++++++++")
            print("initialize the Scalefactor!!!")
            print("++++++++++++++++++++++++++++++++++++++++++++++++")
            GPARAMS.Esoinn_setting.scalemax=np.max(Dataset,0)
            GPARAMS.Esoinn_setting.scalemin=np.min(Dataset,0)
            with open("Sfactor.in",'wb') as f:
                pickle.dump((GPARAMS.Esoinn_setting.scalemax,GPARAMS.Esoinn_setting.scalemin),f)
    except:
        pass

    Dataset=(Dataset-GPARAMS.Esoinn_setting.scalemin)/(GPARAMS.Esoinn_setting.scalemax-GPARAMS.Esoinn_setting.scalemin)
    Dataset[~np.isfinite(Dataset)]=0
    if len(GPARAMS.Esoinn_setting.Model.nodes)!=0:
        Noiseset,a,b,c,d=GPARAMS.Esoinn_setting.Model.predict(Dataset)
    else:
        Noiseset=Dataset 

    GPARAMS.Esoinn_setting.Model.fit(Noiseset,iteration_times=GPARAMS.Train_setting.Esoistep,if_reset=False)
    GPARAMS.Esoinn_setting.Model.Save()
    Noiseset,Noiseindex,nodelabel,cluster_label,signalmask=GPARAMS.Esoinn_setting.Model.predict(Dataset)
    signal_cluster=[[] for i in range(GPARAMS.Esoinn_setting.Model.class_id)]
    
    for i in range(len(Dataset)):
        signal_cluster[cluster_label[i][0]].append(Dataset[i])
    signal_num_list=[len(i) for i in signal_cluster]
    judgenum=math.ceil(sum(signal_num_list)*0.2)
    print ("signal_num_list:",signal_num_list,"judgenum",judgenum)

    removecluster=[i for i in range(len(signal_num_list)) if not(signal_num_list[i] > judgenum)]
    print ("removeclusteid:",removecluster)

    GPARAMS.Esoinn_setting.Model.cut_cluster(removecluster)
    GPARAMS.Esoinn_setting.Model.Save()
    print (GPARAMS.Esoinn_setting.Model.Name,GPARAMS.Esoinn_setting.Model.class_id)  
    print("Class id after Cut action:",GPARAMS.Esoinn_setting.Model.class_id)

    cluster_center_after=GPARAMS.Esoinn_setting.Model.cal_cluster_center()
    if cluster_center_before!=None:# and GPARAMS.Esoinn_setting.NNdict["NN"]!=None:
        print ("Update HDNN")
        updaterule=np.zeros(GPARAMS.Esoinn_setting.Model.class_id)
        for i in range(len(cluster_center_after)):
            vec1=cluster_center_after[i]
            dis=np.sum((np.array(cluster_center_before)-np.array([vec1]*len(cluster_center_before)))**2,1) 
            index=np.argmin(dis)
            print (i,index,"+++++++++++++++++++++++++++")
            updaterule[i]=index 
        """
Exemplo n.º 5
0
        os.system("cp *.ESOINN Sfactor.in ./networks/lastsave ")
    UpdateGPARAMS(jsonfile)
    for i in GPARAMS.Compute_setting.Gpulist:
        GPUQueue.put(i)

    bigset=MSet('Bigset')
    GPARAMS.Dataset_setting.Inputdatasetlist=random.sample(GPARAMS.Dataset_setting.Inputdatasetlist,8)
    for name in GPARAMS.Dataset_setting.Inputdatasetlist:
        tmpset=MSet(name)
        tmpset.Load()
        bigset.mols+=tmpset.mols
    for i in range(GPARAMS.Compute_setting.Checkernum):
        checker_set=MSet('Bigset_%d'%i)
        checker_set.mols=[bigset.mols[j] for j in range(len(bigset.mols)) if j%(i+1)==0]
        checker_set.mols=[bigset.mols[0]]+random.sample(checker_set.mols,min(GPARAMS.Compute_setting.Checkerstep,len(checker_set.mols)))
        checker_set.Save()
    bigset=None 
    for stage in range(GPARAMS.Train_setting.Trainstage,\
                       GPARAMS.Train_setting.Stagenum+GPARAMS.Train_setting.Trainstage):
        LoadModel()
        #==Main MD process with productor and Consumer model==
        ProductPool=Pool(len(GPARAMS.Compute_setting.Gpulist))
        Resultlist=[]
        for i in range(len(GPARAMS.System_setting)):
            result=ProductPool.apply_async(productor,(i,QMQueue,GPUQueue))
            Resultlist.append(result)
        for i in range(GPARAMS.Compute_setting.Checkernum):
            result=ProductPool.apply_async(checker,(i,QMQueue,GPUQueue))
            Resultlist.append(result)
        ProductPool.close()
        for i in range(len(GPARAMS.System_setting)):
Exemplo n.º 6
0
def parallel_caljob(MSetname,manager,ctrlfile):
    para_path='./'
    if GPARAMS.Compute_setting.Traininglevel=="DFTB+":    
        os.environ["OMP_NUM_THREADS"]=GPARAMS.Compute_setting.Ncoresperthreads
        para_path=GPARAMS.Software_setting.Dftbparapath
    input_path='./'+GPARAMS.Compute_setting.Traininglevel+'/Consumer/'
    if not os.path.exists(input_path):
        os.system("mkdir -p "+input_path)
    TMPSet=MSet(MSetname)
    TMPSet.Load()
    mols=TMPSet.mols
    print ('Nmols in Newaddedset:',len(mols))
    if GPARAMS.Train_setting.Ifcpuwithhelp==True:
        dftpercpu=math.ceil(len(mols)/GPARAMS.Train_setting.helpcpunum)
        if dftpercpu<GPARAMS.Train_setting.framenumperjob:
            dftpercpu=GPARAMS.Train_setting.framenumperjob 
        nstage=math.ceil(len(mols)/dftpercpu)
        print (nstage)
        submollist=[mols[i*GPARAMS.Train_setting.framenumperjob:(i+1)*GPARAMS.Train_setting.framenumperjob] for i in range(nstage)]
        subMSetlist=[MSet(MSetname+'_part%d'%i) for i in range(nstage)]
        subMSetresult=[False for i in range(nstage)]
        for i in range(nstage):
            subMSetlist[i].mols=submollist[i]
            subMSetlist[i].Save()
        connectflag=True
        connect_num=0
        while connectflag:
            try:
                trans=pko.Transport((GPARAMS.Train_setting.helpcpunodeip,GPARAMS.Train_setting.helpcpuport))
                #trans.banner_timeout=300
                trans.connect(username=GPARAMS.Train_setting.helpcpuaccount,password=GPARAMS.Train_setting.helpcpupasswd)
                connectflag=False
            except Exception as e:
                print (e)
                connect_num+=1
                print (f"{connect_num} th reconnect to {((GPARAMS.Train_setting.helpcpunodeip,GPARAMS.Train_setting.helpcpuport))} for {MSetname}")
                time.sleep(5)
        ssh=pko.SSHClient()
        #ssh.connect(username=GPARAMS.Train_setting.helpcpuaccount,password=GPARAMS.Train_setting.helpcpupasswd,banner_timeout=300,timeout=15)
        ssh._transport=trans
        sftp=pko.SFTPClient.from_transport(trans)
        workpath=os.getcwd()
        print (workpath)
        jobidlist=[]
        for i in range(nstage):
            subMSetlist[i].mols=submollist[i]
            subMSetlist[i].Save()
            remotepath=GPARAMS.Train_setting.helpcpupath+'/'+MSetname+'/part%d'%i
            srcpath=workpath+'/datasets/%s.pdb'%(MSetname+'_part%d'%i)
            print (" Put pdb file:")
            print (remotepath,srcpath)
            stdin,stdout,stderr=ssh.exec_command('mkdir -p %s/datasets'%remotepath)
            print (stdout.read().decode())
            sftp.put(srcpath,remotepath+'/datasets/%s.pdb'%(MSetname+'_part%d'%i))
            cpurun=open('cpu.run','w')
            if GPARAMS.Train_setting.cpuqueuetype=='PBS':
                cpurun.write(pbscpustr%(GPARAMS.Compute_setting.Ncoresperthreads,GPARAMS.Compute_setting.Traininglevel+"_%d"%i))
            elif GPARAMS.Train_setting.cpuqueuetype=='LSF':
                cpurun.write(lsfcpustr%(GPARAMS.Compute_setting.Ncoresperthreads,GPARAMS.Compute_setting.Traininglevel+"_%d"%i))
            cpurun.write(GPARAMS.Train_setting.helpcpuenv)
            cpurun.write("rm queue\n")
            cpurun.write('touch started\n')
            cpurun.write("python -u $ESOIHOME/bin/Qmcal.py -i %s -d %s> %s.qmout\n"%(ctrlfile,MSetname+'_part%d'%i,MSetname+'_part%d'%i))
            cpurun.write("rm *.chk started\n")
            cpurun.write("touch finished\n")
            cpurun.close()
            sftp.put(localpath=workpath+'/cpu.run',remotepath=remotepath+'/cpu.run')
            sftp.put(localpath=workpath+'/'+ctrlfile,remotepath=remotepath+'/'+ctrlfile)
            if GPARAMS.Train_setting.cpuqueuetype=='PBS':
                stdin,stdout,stderr=ssh.exec_command('cd %s &&touch queue &&qsub cpu.run'%remotepath)
                jobidlist.append(stdout.read().decode().strip())
                print (jobidlist[-1])
#                stdin,stdout,stderr=ssh.exec_command('cd %s &&ls &&qsub cpu.run'%remotepath)
                #print (stdout.read().decode(),stdout.channel.recv_exit_status(),stderr,stdin,remotepath)
                #print (stdout.read().decode(),stderr,stdin)
            elif GPARAMS.Train_setting.cpuqueuetype=='LSF':
                stdin,stdout,stderr=ssh.exec_command('cd %s && bsub <cpu.run'%remotepath)
                print (stdout.read().decode,stderr,stdin)
        t=0
        while False in subMSetresult:
            time.sleep(30)
            t+=30
            for i in range(nstage):
                remotepath=GPARAMS.Train_setting.helpcpupath+'/'+MSetname+'/part%d'%i
                stdin,stdout,stderr=ssh.exec_command("cd %s && ls "%(remotepath))
                tmpstr=stdout.read().decode()
                if 'finished' in tmpstr:
                    state='finished'
                elif 'started' in tmpstr:
                    state='cal'
                elif 'queue' in tmpstr:
                    state='queue'
                if 'finished' in tmpstr:
                    subMSetresult[i]=True
                stdin,stdout,stderr=ssh.exec_command('qstat')
                tmpstr=stdout.read().decode()
                if jobidlist[i] not in tmpstr and state=='queue':
                    stdin,stdout,stderr=ssh.exec_command('cd %s && qsub cpu.run'%remotepath)
                    newid=stdout.read().decode().strip()
                    jobidlist[i]=newid 
            print (t,subMSetresult)
        finishmols=[]
        subMSetlist=[MSet(MSetname+'_part%d'%i) for i in range(nstage)]
        for i in range(nstage):
            srcpath=workpath+'/datasets/%s.pdb'%(MSetname+'_part%d'%i)
            remotepath=GPARAMS.Train_setting.helpcpupath+'/'+MSetname+'/part%d'%i
            os.system('rm %s'%srcpath)
            sftp.get(localpath=srcpath,remotepath=remotepath+'/datasets/'+MSetname+'_part%d.pdb'%i)
            subMSetlist[i].Load()
            finishmols+=subMSetlist[i].mols
        for i in range(len(finishmols)):
            finishmols[i].Cal_EGCM()
        TMPSet.mols=finishmols
        TMPSet.Save()
    else:
        inpathlist=[input_path]*len(mols)
        parapathlist=[para_path]*len(mols)
        corenumperjob=[math.ceil(GPARAMS.Compute_setting.Ncoresperthreads/GPARAMS.Compute_setting.Consumerprocessnum)]*len(mols)
        keywordslist=[GPARAMS.Compute_setting.Gaussiankeywords]*len(mols)
        Atomizationlist=[GPARAMS.Compute_setting.Atomizationlevel]*len(mols)
        inputlist=list(zip(mols,inpathlist,parapathlist,keywordslist,corenumperjob,Atomizationlist))
        paracal_pool=manager.Pool(GPARAMS.Compute_setting.Consumerprocessnum)
        results=paracal_pool.map(calculator,inputlist)
        paracal_pool.close()
        paracal_pool.join()
        mollist=[]
        for i in range(len(results)):
            if results[i][0]==True:
                mollist.append(results[i][1])
                mollist[-1].Cal_EGCM()
        TMPSet.mols=mollist
        TMPSet.Save()
        print ("HHHHHHHHHHHHHHHHHHHH")
        print ("HHHHHHHHHHHHHHHHHHHH")
        print (len(TMPSet.mols))
        print ("HHHHHHHHHHHHHHHHHHHH")
        print ("HHHHHHHHHHHHHHHHHHHH")
    return 
Exemplo n.º 7
0
def consumer(Queue):
    import time
    from ..Base import Molnew
    import os
    from TensorMol import MSet 
    print ("Consumer start")
    Newaddedset=MSet('Stage_%d_Newadded'%GPARAMS.Train_setting.Trainstage)
    num=0
    Error_list=[]
    while True:
        ERROR_mols=Queue.get()
        if ERROR_mols==None:
            break
        for i in range(len(ERROR_mols)):
            ERROR_mols[i][0].name="Stage_%d_Mol_%d_%d"%(GPARAMS.Train_setting.Trainstage,num,i)
            Error_list.append(ERROR_mols[i][1])
            Newaddedset.mols.append(ERROR_mols[i][0])
        num+=1
        if num%2000==0:
            Newaddedset.Save() 
    Error_list=-np.array(Error_list)
    Newaddedset.mols=[Newaddedset.mols[i] for i in np.argsort(Error_list)]

    Dataset=[]
    Newaddedset.mols=Check_MSet(Newaddedset.mols)
    sysnum=(len(GPARAMS.System_setting)+GPARAMS.Compute_setting.Checkernum)
    if len(GPARAMS.Esoinn_setting.Model.nodes)!=0 and GPARAMS.Esoinn_setting.Model.class_id > GPARAMS.Train_setting.Modelnumperpoint:
        for i in Newaddedset.mols:
            try:
                Dataset.append(i.EGCM)
            except:
                Dataset.append(i.Cal_EGCM())
        a,b,c,d,signalmask=GPARAMS.Esoinn_setting.Model.predict(Dataset)
        normalmollist=[];edgemollist=[];noisemollist=[]   
        for i in range(len(Newaddedset.mols)):
            if signalmask[i]=='Noise':
                noisemollist.append(Newaddedset.mols[i])
            if signalmask[i]=='Edge':
                edgemollist.append(Newaddedset.mols[i])
            if signalmask[i]=='Normal':
                normalmollist.append(Newaddedset.mols[i])
        print ("Select Newadded set:",len(noisemollist),len(edgemollist),len(normalmollist))
        if len(Newaddedset.mols)>GPARAMS.Compute_setting.samplebasenum*sysnum:
            normalnumpersys=math.ceil(GPARAMS.Compute_setting.samplebasenum*0.3)
            edgenumpersys=math.ceil(GPARAMS.Compute_setting.samplebasenum*0.3)
            noisenumpersys=math.ceil(GPARAMS.Compute_setting.samplebasenum*0.3)
            edgemollist=edgemollist[:edgenumpersys*sysnum]
            normalmollist=normalmollist[:normalnumpersys*sysnum]
            noisemolnum=GPARAMS.Compute_setting.samplebasenum*sysnum-len(normalmollist)-len(edgemollist)
            noisemollist=random.sample(noisemollist[:noisemolnum*5],noisemolnum)
            Newaddedset.mols=edgemollist+noisemollist_tmp+normalmollist  
        print ("After selecting Newadded set:",len(noisemollist),len(edgemollist),len(normalmollist))
    else:
        print ("================================")
        print ("samplebasnum&sysnum",GPARAMS.Compute_setting.samplebasenum,sysnum)
        print ("================================")
        if len(Newaddedset.mols)>GPARAMS.Compute_setting.samplebasenum*sysnum:
            Newaddedset.mols=random.sample(Newaddedset.mols,GPARAMS.Compute_setting.samplebasenum*sysnum)
            Newaddedset.mols=Newaddedset.mols[:GPARAMS.Compute_setting.samplebasenum*sysnum]
    Newaddedset.Save()
    return
Exemplo n.º 8
0
from ESOI_HDNN_MD.Train import consumer
from ESOI_HDNN_MD.Computemethod import Cal_NN_EFQ
import os
#from TensorMol import *
from TensorMol import MSet, JOULEPERHARTREE
import argparse as arg
from multiprocessing import Queue, Process, Manager, Pool
from matplotlib import pyplot as plt
parser = arg.ArgumentParser(
    description=
    'Grep qm area from an Amber MDcrd trajory to make training dataset!')
parser.add_argument('-i', '--input')

args = parser.parse_args()
jsonfile = args.input
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
if __name__ == "__main__":
    UpdateGPARAMS(jsonfile)
    LoadModel()
    if not os.path.exists("./results"):
        os.system("mkdir ./results")
    for i in range(len(GPARAMS.Dataset_setting.Inputdatasetlist)):
        TMPSet = MSet(GPARAMS.Dataset_setting.Inputdatasetlist[i])
        TMPSet.Load()
        for j in range(len(TMPSet.mols)):
            if j % 1000 == 0:
                print(TMPSet.Name, j)
        #for j in range(10):
            EGCM = TMPSet.mols[j].Cal_EGCM()
        TMPSet.Save()