示例#1
0
def main():
    
    args = cT.parseCMD()
    
    passwd = cT.Credentials(args.UserName)
    
    # define all data you want to pull from pimc data files IN ORDER.
    fileTypes = ['log','estimator','super','bipart_dens','ntWind']
    colNums = [
            [4,11,12,13,-4], 
            [0,1,2,3], 
            [0,1], 
            [0,1,2] ]
    observables = [
            ['E','Cv1','Cv2','Cv3','N'],
            ['rho_s/rho','Wx^2','Wy^2','Wz^2'],
            ['filmDens','bulkDens'],
            ['W^2','W_{2d}^2','W_{3d}^2'] ]

    # grab types of estimators from above.  Remove log file.
    estimTypes = list(fileTypes)
    if 'log' in estimTypes:
        estimTypes.pop(estimTypes.index('log'))
    
    # create list of reduced file names for combined data.
    fileNames = []
    for f in estimTypes:
        fileNames.append(str('Reduced'+str(f.capitalize())+'Data.dat'))

    print '\nCreating: ',fileNames,'\n'

    # if data doesn't exist, pull it from cluster
    if not args.pulled:

        print 'Pulling data from cluster.'

        # create ssh and sftp instances
        ssh = paramiko.SSHClient() 
        ssh.load_host_keys(os.path.expanduser(
            os.path.join("~", ".ssh", "known_hosts")))
        ssh.connect('bluemoon-user1.uvm.edu', username=args.UserName, 
                password=passwd)
        sftp = ssh.open_sftp()

        # move to desired directory on cluster
        sftp.chdir(args.targetDir)

        # create list of only seed directory names, get rid of other things
        allStuffInDir = sftp.listdir()
        #for s in seedDirs: # laptop didnt like this method sometimes. wtf?
        #    if s[:4] != 'seed':
        #        seedDirs.pop(seedDirs.index(s))
        seedDirs = []
        for s in range(len(allStuffInDir)):
            if 'seed' in allStuffInDir[s]:
                seedDirs.append( allStuffInDir[s] )
        
        # pull all requested file types into organized directories
        for s in seedDirs:
            newName = cT.returnSeedDirName(s)
            if os.path.exists("./"+newName):
                sys.exit(newName+' already exists.')
            os.makedirs("./"+newName)
            os.chdir("./"+newName)
            sftp.chdir('./'+s+'/OUTPUT/')
            
            moose = sftp.listdir()
            for thing in fileTypes:
                for m in moose:
                    if thing in m:
                        sftp.get('./'+m, './'+m)
                print 'pulled ',thing,' files for ',newName

            sftp.chdir('../..')
            os.chdir('..')
        
        # Rename files to have seed number replace first three numbers of pimcID.
        # Optionally delete all seed directories that were pulled from cluster.
        cT.renameFilesInDirecs(args.delDir)
      
        # check for repeated pimcIDs -- broken.
        #cT.repeatCheck()

        # close instances of sftp and ssh
        sftp.close()
        ssh.close()

    # optionally combine all data of the same temperature into one
    # much larger array.
    if args.Crunch:
        print 'Crunching Data'
        cT.crunchData(estimTypes,colNums,observables)

    # optionally make a trimmed version of the data files that
    # makes all arrays the length of the shortest array.
    # NOT NECESSARY BUT SOMETIMES USEFUL.
    if args.trimData:
        print 'Decided to make trimmed data files'
        cT.trimData(fileNames)
示例#2
0
def main():

    args = cT.parseCMD()

    passwd = cT.Credentials(args.UserName)

    # define all data you want to pull from pimc data files IN ORDER.
    fileTypes = ['log', 'estimator', 'super', 'bipart_dens', 'ntWind']
    colNums = [[4, 11, 12, 13, -4], [0, 1, 2, 3], [0, 1], [0, 1, 2]]
    observables = [['E', 'Cv1', 'Cv2', 'Cv3', 'N'],
                   ['rho_s/rho', 'Wx^2', 'Wy^2', 'Wz^2'],
                   ['filmDens', 'bulkDens'], ['W^2', 'W_{2d}^2', 'W_{3d}^2']]

    # grab types of estimators from above.  Remove log file.
    estimTypes = list(fileTypes)
    if 'log' in estimTypes:
        estimTypes.pop(estimTypes.index('log'))

    # create list of reduced file names for combined data.
    fileNames = []
    for f in estimTypes:
        fileNames.append(str('Reduced' + str(f.capitalize()) + 'Data.dat'))

    print '\nCreating: ', fileNames, '\n'

    # if data doesn't exist, pull it from cluster
    if not args.pulled:

        print 'Pulling data from cluster.'

        # create ssh and sftp instances
        ssh = paramiko.SSHClient()
        ssh.load_host_keys(
            os.path.expanduser(os.path.join("~", ".ssh", "known_hosts")))
        ssh.connect('bluemoon-user1.uvm.edu',
                    username=args.UserName,
                    password=passwd)
        sftp = ssh.open_sftp()

        # move to desired directory on cluster
        sftp.chdir(args.targetDir)

        # create list of only seed directory names, get rid of other things
        allStuffInDir = sftp.listdir()
        #for s in seedDirs: # laptop didnt like this method sometimes. wtf?
        #    if s[:4] != 'seed':
        #        seedDirs.pop(seedDirs.index(s))
        seedDirs = []
        for s in range(len(allStuffInDir)):
            if 'seed' in allStuffInDir[s]:
                seedDirs.append(allStuffInDir[s])

        # pull all requested file types into organized directories
        for s in seedDirs:
            newName = cT.returnSeedDirName(s)
            if os.path.exists("./" + newName):
                sys.exit(newName + ' already exists.')
            os.makedirs("./" + newName)
            os.chdir("./" + newName)
            sftp.chdir('./' + s + '/OUTPUT/')

            moose = sftp.listdir()
            for thing in fileTypes:
                for m in moose:
                    if thing in m:
                        sftp.get('./' + m, './' + m)
                print 'pulled ', thing, ' files for ', newName

            sftp.chdir('../..')
            os.chdir('..')

        # Rename files to have seed number replace first three numbers of pimcID.
        # Optionally delete all seed directories that were pulled from cluster.
        cT.renameFilesInDirecs(args.delDir)

        # check for repeated pimcIDs -- broken.
        #cT.repeatCheck()

        # close instances of sftp and ssh
        sftp.close()
        ssh.close()

    # optionally combine all data of the same temperature into one
    # much larger array.
    if args.Crunch:
        print 'Crunching Data'
        cT.crunchData(estimTypes, colNums, observables)

    # optionally make a trimmed version of the data files that
    # makes all arrays the length of the shortest array.
    # NOT NECESSARY BUT SOMETIMES USEFUL.
    if args.trimData:
        print 'Decided to make trimmed data files'
        cT.trimData(fileNames)
示例#3
0
def main():

    # set number of equilibration steps
    equilNum = 1000
    
    # -------------------------------------------------------------------------
    # NOTE: NEW USERS WILL NEED TO CHANGE THESE STRINGS!!
    # full path to gensubmit and submit file must be supplied as below.
    #
    # NOTE:  This way of doing subFilePath has been replaced by keeping submit
    # in the same directory as stateFiles on local machine in the case of
    # submissions from equilibrated states.  See later in this script!
    genSubPath = '/home/max/Documents/Code/PIMC/SCRIPTS/MTG/MTG_CH_gensubmit.py'
    #subFilePath = '/home/max/Documents/Code/PIMC/SCRIPTS/submitscripts/submit'
    
    # commands to add directories to path where blitz, boost, pimc sit.
    # not sure why, but this must be done every time for paramiko.
    expLibs = 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/users/m/t/mtgraves/local/lib ; '
    expPBSstuff = 'export PATH=/opt/pbs/bin/:$PATH ; '
    # -------------------------------------------------------------------------

    # parse cmd line
    args = cT.parseCMD()

    # get username and password for VACC
    passwd = cT.Credentials(args.UserName)

    # create ssh and sftp instances
    ssh = paramiko.SSHClient() 
    ssh.load_host_keys(os.path.expanduser(
        os.path.join("~", ".ssh", "known_hosts")))
    ssh.connect('bluemoon-user2.uvm.edu', username=args.UserName, password=passwd)
    sftp = ssh.open_sftp()

    # Move to desired directory on cluster from home and if it doesn't 
    # exist, then create it.  This is given by the -t cmd line flag.
    try:
        sftp.chdir(args.targetDir)
    except:
        sftp.mkdir(args.targetDir)
        sftp.chdir(args.targetDir)

    # array of seed numbers, between -L and -H flag from cmd line.
    seedNums = np.arange(args.lowSeed,args.highSeed+1)

    # keep track of directory you are in in the terminal
    workingDir = os.getcwd()

    # get list of (g)ce-state files in directory supplied from cmdline.
    os.chdir('./stateFiles')
    stateFileList = sorted(glob.glob('*state*'))

    print 'we have ',len(stateFileList),' different stateFiles'

    # get list of (g)ce-log files also.
    os.chdir('../logFiles')
    logFileList = sorted(glob.glob('*log*'))

    # -----------------------------------------------------------------
    # build list of restart strings from log files
    restartStrList = []
    for logFile in logFileList:
        with open(logFile) as inFile:
            for n, line in enumerate(inFile):
                if n == 2:
                    restartStrList += [line[2:]]
   

    # -----------------------------------------------------------------
    # build list of worm constants and COM update radii
    wormConstList = []
    comUpdateList = []
    for s in restartStrList:
        wormconst = re.findall(r'-C\s\d*\.?\d*(?:e[-+]\d+)?',s)
        comUpdateRad = re.findall(r'-D\s\d*\.?\d*(?:e[-+]\d+)?',s)
        wormConstList += wormconst
        comUpdateList += comUpdateRad


    subNum = 0
    numToRepeat = 99999 # not currently set to start from multiple states

    if numToRepeat*len(stateFileList) < len(seedNums):
        sys.exit('Need to up numToRepeat')

    os.chdir('..')
    
    for nummm, seedNum in enumerate(seedNums):

        # we want to start a few seeds per state.
        if nummm != 0 and nummm%numToRepeat == 0:
            subNum += 1

        print wormConstList[subNum]
        print comUpdateList[subNum]
        print stateFileList[subNum]

        # create seedXXX directory
        seedDirName = cT.returnSeedDirName(int(seedNum))
        sftp.mkdir(seedDirName)
        sftp.chdir('./'+seedDirName)

        # create directory structure inside of each seedXXX direc.
        sftp.mkdir('out')
        sftp.mkdir('OUTPUT')

        # change random number seed in submit file -- this defines hacky.
        # NOTE:  Must have submit script in current working directory.
        subFilePath = os.getcwd()
        if subFilePath[-1]!='/':
            subFilePath +='/'
        subFilePath += args.stateFilesDir+'/../submit'
        
        with open(subFilePath) as inFile, open(subFilePath+'_temp', 'w') as outFile:
            for n, line in enumerate(inFile):
                if n==0:
                    if '-p 000' not in line:
                        sys.exit('Include -p 000 in submit script!')
                outFile.write( re.sub(r'-p 000', r'-p '+str(seedNum), line))


        # run gensubmit to create initial submit script.
        command = ('python '+genSubPath+' '+subFilePath+\
                '_temp --cluster=bluemoon '+'-m '+args.memRequest)
        subprocess.check_call(command, shell=True)
       
        # store submit file name generated from gensubmit
        subFile = glob.glob('*submit-pimc.*')
        if len(subFile) != 1:
            sys.exit('you have more than one submit file here')
        subFile = subFile[0]

        # optionally set up submit script to call an executable called
        # pimcNoSwaps, which must be located on your path on the cluster.
        sF2 = subFile[:-4]+'_temp.pbs'
        numOccur = 0
        stateNum = 0
        with open(subFile) as inFile, open(sF2, 'w') as outFile:
            for n, line in enumerate(inFile):
                
                match = re.search(r'pimc',line)
                if args.noSwaps:
                    outFile.write( re.sub(r'pimc', r'pimcNoSwaps', line))
                else:
                    outFile.write(line)

                if numOccur%2 == 1:
                    stateNum += 1
                
                if match != None:
                    numOccur += 1

        os.rename(sF2,subFile)

        # we check that there is a directory on the cluster called
        # stateFiles that contains exactly the (g)ce-state files
        # that are in the directory supplied from cmdline.
        # This double-storage is set up to make the user think about
        # what they are doing.
        try:
            sftp.chdir('../stateFiles')
            stFs = sorted(sftp.listdir())
            if stateFileList != stFs:
                sys.exit('ERROR: State files in targeted directory on '+
                        'cluster are different than on your machine.')
            sftp.chdir('../'+seedDirName)
        except:
            sys.exit('ERROR: Must have state files in directory called '+
                    'stateFiles in the parent directory of your jobs.')
        
             
        # -----------------------------------------------------------------
        # now go through and replace the worm constants.
        sF2 = subFile[:-4]+'_temp.pbs'
        numOccur = 0
        stateNum = 0
        
        os.chdir(workingDir)
        with open(subFile) as inFile, open(sF2, 'w') as outFile:
            for n, line in enumerate(inFile):
                
                match = re.search(r'-C ',line)
                if match != None:
                    outFile.write( re.sub(r'-C\s\d*\.?\d*(?:e[-+]\d+)?',
                        wormConstList[subNum], line))
                else:
                    outFile.write(line)

                if numOccur%2 == 1:
                    stateNum += 1
                
                if match != None:
                    numOccur += 1

        os.rename(sF2,subFile)
        
        # -----------------------------------------------------------------
        # now go through and replace (or add a new) COM update radii.
        sF2 = subFile[:-4]+'_temp.pbs'
        numOccur = 0
        stateNum = 0
        
        os.chdir(workingDir)
        with open(subFile) as inFile, open(sF2, 'w') as outFile:
            for n, line in enumerate(inFile):
                
                match = re.search(r'pimc -T ',line)
                match2 = re.search(r'pimcNoSwaps -T ',line)
                echoMatch = re.search(r'echo "',line)
                matchD = re.search(r'-D ',line)
                if echoMatch:
                    outFile.write(line)
                else:
                    if match != None:
                        if matchD != None:
                            outFile.write( re.sub(r'-D\s\d*\.?\d*(?:e[-+]\d+)?',
                                comUpdateList[subNum], line))
                        else:
                            outFile.write( re.sub(r'pimc ',
                                r'pimc '+comUpdateList[subNum]+' ', line))
                    elif match2 != None:
                        if matchD != None:
                            outFile.write( re.sub(r'-D\s\d*\.?\d*(?:e[-+]\d+)?',
                                comUpdateList[subNum], line))
                        else:
                            outFile.write( re.sub(r'pimcNoSwaps ',
                                r'pimcNoSwaps '+comUpdateList[subNum]+' ', line))
                    else:
                        outFile.write(line)
                        #try:    
                        #    outFile.write(line[:-1]+' '+comUpdateList[stateNum])
                        #except:
                        #    outFile.write(line)
    
                if numOccur%2 == 1:
                    stateNum += 1
                
                if match != None:
                    numOccur += 1

        os.rename(sF2,subFile)
         
        # -----------------------------------------------------------------
        # now go through and replace the -E #### and -p ### part of the 
        # submitscript with the specified lines.
        sF2 = subFile[:-4]+'_temp.pbs'
        numOccur = 0
        stateNum = 0
        clusterCWDpre = '${PBS_O_WORKDIR}/../stateFiles/'
        
        os.chdir(workingDir)
        with open(subFile) as inFile, open(sF2, 'w') as outFile:
            for n, line in enumerate(inFile):
                
                match = re.search(r'-E\s\d+',line)
                if match != None:
                    outFile.write( re.sub(r'-E\s\d+','-E '+str(equilNum)+
                        ' -s '+clusterCWDpre+stateFileList[subNum], 
                        line))
                else:
                    outFile.write(line)

                if numOccur%2 == 1:
                    stateNum += 1
                
                if match != None:
                    numOccur += 1

        os.rename(sF2,subFile)
        # -----------------------------------------------------------------  
       

        # copy submit file over to bluemoon
        sftp.put('./'+subFile, './'+subFile)
    
        # optionally submit jobs
        if args.submitJobs:

            # build submit command
            submitStuff = 'qsub '+subFile
            changeDir = 'cd '+args.targetDir+'/'+seedDirName+' ; '
            subComm = changeDir+expLibs+expPBSstuff+submitStuff
           
            # submit the command
            stdin, stdout, stderr = ssh.exec_command(subComm)
            
            print 'Output: ',stdout.readlines()
            if stderr.readlines() != []:
                print 'Error: ',stderr.readlines()

        time.sleep(5)
   
        sftp.chdir('..')

    sftp.close()
    ssh.close()
示例#4
0
def main():

    # unique tag for the name of the job to be presented in the scheduler.
    uTag = 'S05-T0.50'

    # set number of equilibration steps
    equilNum = 0

    # set number of bins to try
    binNum = 50000
    
    # -------------------------------------------------------------------------
    # commands to add directories to path where blitz, boost, pimc sit.
    # not sure why, but this must be done every time for paramiko.
    expLibs = 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/users/m/t/mtgraves/local/lib ; '
    expPBSstuff = 'export PATH=/opt/pbs/bin/:$PATH ; '
    # -------------------------------------------------------------------------

    # parse cmd line
    args = cT.parseCMD()

    # get username and password for VACC
    passwd = cT.Credentials(args.UserName)

    # create ssh and sftp instances
    ssh = paramiko.SSHClient() 
    ssh.load_host_keys(os.path.expanduser(
        os.path.join("~", ".ssh", "known_hosts")))
    ssh.connect('bluemoon-user2.uvm.edu', username=args.UserName, password=passwd)
    sftp = ssh.open_sftp()

    # Move to desired directory on cluster.
    sftp.chdir(args.targetDir)

    # array of seed numbers, between -L and -H flag from cmd line.
    seedNums = np.arange(args.lowSeed,args.highSeed+1)

    # run through all seeds.
    for seedNum in seedNums:

        restartStr = ''

        # move to seedXXX direc.
        seedDirName = cT.returnSeedDirName(int(seedNum))
        sftp.chdir('./'+seedDirName)

        # if a resubmit file already exists, delete it.
        if 'resubmit-pimc.pbs' in sftp.listdir():
            sftp.remove('resubmit-pimc.pbs')
        
        # grab log file string from OUTPUT
        sftp.chdir('./OUTPUT')
        
        # get names of log files
        logFileNames = []
        for f in sftp.listdir():
            if '-log-' in f:
                logFileNames.append(f)
        
        # Naturally sort the log files (account for +/- floats in name).
        # NOTE:  This works for gce- files with all the same string
        #   but varying chemical potential.  Any other use must be 
        #   thought about first as this hasn't been tested for other uses!!!
        logFileNames = natsort.natsorted(logFileNames)

        # build array of restart strings from naturally sorted logfiles
        restartStrings = []
        
        oneLessLine = False
        for logFileName in logFileNames:
            with sftp.open(logFileName) as inFile:
                for n,line in enumerate(inFile):
                    # check for whether the restart string in gce-log has been
                    # moved up one line.  We want to write this to the resubmit
                    # script.
                    lineUp = False
                    if n == 1:
                        if len(line) < 10:
                            lineUp = True
                            continue
                        else:
                            restartStrings.append(line[2:-1])
                    if n == 1:
                        # check if log file 2nd line is blank (some are, some aren't...
                        # this is caused by bug in converting trestles (xsede) scripts
                        # over to bluemoon scripts)
                        if line == '':
                            oneLessLine = True
                            continue
                        else:
                            restartStrings.append(line[2:-1])
                    if n == 2:
                        if not lineUp:
                            restartStrings.append(line[2:-1])
                        if not oneLessLine:
                            restartStrings.append(line[2:-1])

        for nr, restartStr in enumerate(restartStrings):
            restartStrings[nr]+=' >> ${PBS_O_WORKDIR}/out/pimc-0.out 2>&1'
            restartStrings[nr] = re.sub(r'-E\s\d+',r'-E '+str(equilNum),restartStrings[nr])
            restartStrings[nr] = re.sub(r'-S\s\d+',r'-S '+str(binNum),restartStrings[nr])
            restartStrings[nr] = re.sub(r'-W\s\d+',r'-W '+str(29),restartStrings[nr])
        
        sftp.chdir('..')
        
        # check if no restart string exists, in this case just skip the seed
        # (hackyyyy -- needs to start it over in an intelligent manner.)
        if restartStrings == []:
            print 'seed number ',str(seedNum),' is empty'
            sftp.chdir('..')
            continue
        else:

            numStr = 0
            reSubName = 'resubmit-pimc.pbs'

            # determine name of the original submit script.
            for f in sftp.listdir():
                if 'submit-pimc' in f:
                    subFileName = f

            # determine if original submit script was from trestles or bluemoon
            tresTOblue = False
            with sftp.open(subFileName) as inFile:
                for n, line in enumerate(inFile):
                    if n == 6:
                        if 'uvm104' in line:
                            tresTOblue = True

            # write resubmit script for the case of original submit script being
            # from trestles.
            if tresTOblue:

                with sftp.open(reSubName,'w') as outFile:

                    # write beginning statements set up with the appropriate torque
                    # parameters.  Also, submit to the tmp directory for faster i/o.
                    outFile.write('#!/bin/bash\n\
                            \n#PBS -S /bin/bash\
                            \n#PBS -l pmem=1gb,pvmem=1gb\
                            \n#PBS -l nodes=1:ppn=1\
                            \n#PBS -l walltime=30:00:00\
                            \n#PBS -N pimc-%s\
                            \n#PBS -V\
                            \n#PBS -j oe\
                            \n#PBS -o out/pimc-${PBS_JOBID}\
                            \n#PBS -m n\n\
                            \nmkdir /tmp/${PBS_JOBID}\
                            \ncd /tmp/${PBS_JOBID}\
                            \nmkdir OUTPUT\
                            \ngzip ${PBS_O_WORKDIR}/OUTPUT/*\
                            \ncp -r ${PBS_O_WORKDIR}/OUTPUT/* ./OUTPUT/\
                            \ngunzip OUTPUT/*\
                            \necho "Starting PBS script submit-pimc.pbs at:`date`" \
                            \necho "  host:       ${PBS_O_HOST}"\
                            \necho "  node:       `cat ${PBS_NODEFILE}`"\
                            \necho "  jobid:      ${PBS_JOBID}"\n\n' % uTag)
                    
                    # write out the submit string appropriate to the job.
                    with sftp.open(subFileName) as inFile:
                        for n, line in enumerate(inFile):
                            if line[:4] == 'pimc':
                                outFile.write(restartStrings[numStr])
                                numStr += 1
                            else:
                                continue
                    
                    # write closing statements to bring data back from the node.
                    outFile.write('\n\ngzip OUTPUT/*\
                            \ncp OUTPUT/* ${PBS_O_WORKDIR}/OUTPUT\
                            \ncd ${PBS_O_WORKDIR}\
                            \ngunzip ${PBS_O_WORKDIR}/OUTPUT\
                            \nrm -r /tmp/${PBS_JOBID}')
            
            # write resubmit script for the case of original submit script being
            # from bluemoon.
            else:
                with sftp.open(subFileName) as inFile, sftp.open(reSubName,'w') as outFile:
                    for n, line in enumerate(inFile):
                        if line[:4] == 'pimc':
                            outFile.write(restartStrings[numStr])
                            numStr += 1
                        elif r'#PBS -N' in line:
                            outFile.write(line[:-2]+uTag+'\n')
                        elif r'mkdir OUTPUT' in line:
                            outFile.write(line)
                            outFile.write('gzip ${PBS_O_WORKDIR}/OUTPUT/*\n')
                            outFile.write('cp -r ${PBS_O_WORKDIR}/OUTPUT/* ./OUTPUT/\n')
                            outFile.write('gunzip OUTPUT/*\n')
                        else:
                            outFile.write(line)

            print restartStrings[0],'\n'

            # my dumb work-around for allowing writing on cluster to finish.
            time.sleep(5)

            # -----------------------------------------------------------------  
            # optionally submit jobs
            if args.submitJobs:

                #if restartStrings == []:
                #    continue
                #else:

                # build submit command
                submitStuff = 'qsub '+reSubName
                changeDir = 'cd '+args.targetDir+'/'+seedDirName+' ; '
                subComm = changeDir+expLibs+expPBSstuff+submitStuff
               
                # submit the command
                stdin, stdout, stderr = ssh.exec_command(subComm)
                
                print 'Output: ',stdout.readlines()
                if stderr.readlines() != []:
                    print 'Error: ',stderr.readlines()

            sftp.chdir('..')