def find_inputs(): """ Finds the inputs in subdirs. Verifies the appropriateness of each input file. Returns Dictionary of {dir: inputfile} """ inputdirs = list() filetree = dict() results = list() #find files in dirs: for d, _, filenames in os.walk(os.curdir): for f in filenames: fileExt = os.path.splitext(f)[-1] if (fileExt == '.in' or fileExt == '.gjf' or fileExt == '.com' or fileExt == '.inp' or fileExt == '.input'): results.append(os.path.join(d,f)) for r in results: if os.path.dirname(r) in filetree: filetree[os.path.dirname(r)].append(os.path.basename(r)) else: filetree[os.path.dirname(r)] = [os.path.basename(r)] multidir = list() badinput = list() logging.debug("{} potential inputs found".format(len(filetree))) inputdirs = dict() for key in filetree: for v in filetree[key]: job = check_input_file(os.path.join(key, v)) if job: if key in inputdirs: logging.warning("More than one valid input file in {}." \ "Please have only one input per directory.".format(key)) multidir.append(key) else: inputdirs[key] = [v, job] else: logging.warning("File {} not valid input.".format( str(os.path.join(key, v)))) badinput.append(str(os.path.join(key, v))) if key not in inputdirs: logging.warning( "Directory {} does not contain valid input.".format(key)) if len(badinput) > 0: logging.info("Following inputs have errors:\n{}".format( turbogo_helpers.list_str(badinput))) if len(multidir) > 0: logging.info("Following directories have too many inputs:\n{}".format( turbogo_helpers.list_str(multidir))) return inputdirs
def main(): """First call on the code""" logging.basicConfig( format=('%(asctime)s - %(name)s - [%(levelname)s] - %(message)s'), filename='turbocontrol.log', level=logging.DEBUG) logging.info('Started') p = argparse.ArgumentParser() parser = argparse.ArgumentParser(prog="TurboControl", description="Usage: %prog [options]") group = parser.add_mutually_exclusive_group() group.add_argument('-v', '--verbose', action='store_true', help='Run more verbose (show debugging info)') group.add_argument('-q', '--quiet', action="store_true", help='Run less verbose (show only warnings)') parser.add_argument('-s', '--solvent', dest="solvent", action="store_true", help='Show solvents known to Turbocontrol') args = parser.parse_args() ch = logging.StreamHandler(sys.stdout) ch.setLevel(logging.INFO) if args.solvent: print "\n".join(sorted(turbogo_helpers.DIELECTRICS.keys(), key=lambda s: s.lower())) exit() if args.verbose: ch.setLevel(logging.DEBUG) elif args.quiet: ch.setLevel(logging.WARNING) logging.getLogger().setLevel(logging.INFO) formatter = logging.Formatter( '%(asctime)s - %(name)s - [%(levelname)s] - %(message)s' ) ch.setFormatter(formatter) logging.getLogger().addHandler(ch) start = time() inputdirs = find_inputs() inputfiles = list() for key in inputdirs: inputfiles.append(str(os.path.join(key, inputdirs[key][0]))) logging.info("Inputs found at:\n{}".format( turbogo_helpers.list_str(sorted(inputfiles)))) jobs = list() for key in inputdirs: job = Jobset(key, inputdirs[key][0], inputdirs[key][1]) job.submit() job.curstart = time() if not args.verbose: try: os.remove(os.path.join(key, 'define.log')) except (OSError, IOError): pass jobs.append(job) end = time() - start logging.info("Set up and submitted {} jobs in {} seconds.".format( len(jobs),end)) if len(jobs) > 0: watch_jobs(jobs) else: logging.warning("No jobs submitted. Exiting.") exit()
def watch_jobs(jobs): """ Monitors jobs running. If jobs request frequency, then submits to frequency calculation """ orunning = list() frunning = list() ocomplete = list() fcomplete = list() ocrashed = list() fcrashed = list() crashed = list() completed = list() failed_submit = list() stuck = list() allcomplete = False jobdict = dict() starttime = time() for job in jobs: if job.status == 'Opt Submitted' or job.status == 'TS Submitted': orunning.append(job.jobid) jobdict[job.jobid] = job elif job.status == 'Freq Submitted': frunning.append(job.jobid) jobdict[job.jobid] = job else: failed_submit.append(job.name + ' - ' + job.status) logging.info('There are {} jobs being watched.'.format( len(jobdict) )) if len(failed_submit) > 0: logging.warning('There are {} jobs that failed to launch:\n{}'.format( len(failed_submit), turbogo_helpers.list_str(failed_submit) )) if len(jobdict) == 0: exit() loopcount = 0 change = False #delay to ensures all jobs are in queue, and catch first moment fails sleep(60) while not allcomplete: alljobs = turbogo_helpers.get_all_active_jobs() if len(alljobs) == 0 and (len(orunning) > 0 or len(frunning) > 0): #possible fail at getting jobs from queue sleep(60) alljobs = turbogo_helpers.get_all_active_jobs() if len(alljobs) == 0: #One more try sleep(300) alljobs = turbogo_helpers.get_all_active_jobs() checkojobs = list(orunning) checkfjobs = list(frunning) for job in alljobs: if job in checkojobs: checkojobs.remove(job) elif job in checkfjobs: checkfjobs.remove(job) if len(checkojobs) != 0: #Some jobs not running for ojob in checkojobs: job = jobdict[ojob] del jobdict[ojob] orunning.remove(job.jobid) #find out what happened to the job & deal with it status = check_opt(job) if status == 'freq': ocomplete.append(job.name) frunning.append(job.jobid) jobdict[job.jobid] = job logging.debug( "Job {} submitted for freq with jobid {}.".format( job.name, job.jobid )) elif status == 'fcrashed': fcrashed.append(job.name) crashed.append(job.name) logging.debug("Job {} crashed starting freq.".format( job.name )) elif status == 'ocrashed': ocrashed.append(job.name) crashed.append(job.name) logging.debug("Job {} crashed in opt.".format( job.name )) else: completed.append(job.name) write_stats(job) logging.debug("Job {} completed opt.".format( job.name )) change = True if len(checkfjobs) != 0: #some freq not running for fjob in checkfjobs: job = jobdict[fjob] del jobdict[fjob] frunning.remove(job.jobid) #find out what happened to the job and deal with it status = check_freq(job) if status == 'opt': #job was resubmitted with new geometry to avoid saddle point orunning.append(job.jobid) jobdict[job.jobid] = job logging.debug( "Job {} resubmitted for opt with jobid {}.".format( job.name, job.jobid )) elif status == 'fcrashed': fcrashed.append(job.name) crashed.append(job.name) logging.debug("Job {} crashed starting freq.".format( job.name )) elif status == 'ocrashed': ocrashed.append(job.name) crashed.append(job.name) logging.debug("Job {} crashed restarting opt.".format( job.name )) elif status == 'same' or status == 'imaginary': stuck.append(job.name) write_stats(job) logging.info( "Job {} stuck on transition state with freq {}.".format( job.name, job.firstfreq)) elif status == 'ts': write_stats(job) completed.append(job.name) logging.debug("Job {} completed ts.".format( job.name )) else: write_stats(job) completed.append(job.name) logging.debug("Job {} completed freq.".format( job.name )) change = True if len(orunning) == 0 and len(frunning) == 0: #all jobs finished or crashed: allcomplete = True else: if loopcount % (3*6) == 0 and change == True: #3-Hourly status update if a change happened logstring = "\n----------------------------------------------" \ "------\n" logstring += "At {}:\n".format(strftime("%d/%m/%y %H:%M:%S")) if len(orunning) > 0: logstring += "There are {} running opt jobs:\n{}\n".format( len(orunning), turbogo_helpers.list_str(orunning)) if len(frunning) > 0: logstring += "There are {} running freq jobs:\n{}\n".format( len(frunning), turbogo_helpers.list_str(frunning)) if len(crashed) > 0: logstring += "There are {} crashed jobs:\n{}\n".format( len(crashed), turbogo_helpers.list_str(crashed)) if len(stuck) > 0: logstring += "There are {} stuck jobs:\n{}\n".format( len(stuck), turbogo_helpers.list_str(stuck)) if len(completed) > 0: logstring += "There are {} completed jobs:\n{}\n".format( len(completed), turbogo_helpers.list_str(completed)) logstring += "-----------------------------------------------" \ "-----" logging.info(logstring) change = False loopcount += 1 sleep(10*60) #after job finished/crashed logging elapsed = turbogo_helpers.time_readable(time()-starttime) logging.warning("{} jobs completed. {} jobs crashed.".format( len(fcomplete),len(crashed))) logstring = "\n----------------------------------------------------\n" logstring += "Completed at {} after {}:\n".format( strftime("%d/%m/%y %H:%M:%S"), elapsed) if len(completed) > 0: logstring += "There are {} completed jobs:\n{}\n".format( len(completed), turbogo_helpers.list_str(completed)) if len(stuck) > 0: logstring += "There are {} stuck jobs:\n{}\n".format( len(stuck), turbogo_helpers.list_str(stuck)) if len(crashed) > 0: logstring += "There are {} crashed jobs:\n{}\n".format( len(crashed), turbogo_helpers.list_str(crashed)) if len(failed_submit) > 0: logstring += "There are {} jobs that failed to start:\n{}\n".format( len(failed_submit), turbogo_helpers.list_str(failed_submit)) logstring += "----------------------------------------------------" logging.info(logstring)