def save_task(filename, commit): task_id=1 rootdir=util.abs_root_path() taskdir=os.path.join(rootdir, exp_common.TASK_DIR) if not os.path.isdir(taskdir): if not os.path.isdir(exp_common.DOT_DIR): os.mkdir(exp_common.DOT_DIR) os.mkdir(taskdir) else: a=[int(x) for x in os.listdir(taskdir)]; task_id=max(a)+1 try: os.mkdir(os.path.join(taskdir, str(task_id))) except: print 'Could not create task directory. Aborting' exit(1) shutil.copy(filename, os.path.join(taskdir, str(task_id))) new_filename=os.path.join(taskdir, str(task_id),filename) task_namespace=dict() task_namespace['commit']=commit task_namespace['filename']=filename print "task file is", os.path.join(taskdir, str(task_id), exp_common.TASK_COMMIT_FILE) with open(os.path.join(taskdir, str(task_id), exp_common.TASK_COMMIT_FILE),'w') as f: f.write(repr(task_namespace)) f.write('\n') return task_id
def load_info(hsh): """Load info about an experiment as saved by save_descr""" try: f = open(os.path.join(util.abs_root_path(), exp_common.RESULTS_DIR, hsh, exp_common.DESCR_FILE)) except IOError as e: return None else: return eval(f.read())
def load_task(task_id): rootdir=util.abs_root_path() taskdir=os.path.join(rootdir, exp_common.TASK_DIR) taskfilename=os.path.join(taskdir, str(task_id), exp_common.TASK_COMMIT_FILE) try: f=open(taskfilename) except: print 'Could not access task file {}'.format(taskfilename) exit(1) task_namespace=eval(f.read()) return (task_namespace['filename'], task_namespace['commit'])
def expander(m): if m.group(1) == '': # let this be handled in the next pass return '{}' elif m.group(1)[0] == ':': # parameter substition # uh oh error checking... used_params[m.group(1)[1:]] = True return str(params[m.group(1)[1:]]) else: # in-description parameter substitution if ':' in m.group(1): d, ps = m.group(1).split(':', 1) ps = ps.split(',') # more param type mess d += ':' + ','.join(p + '=' + str(params[p]) for p in ps) for p in ps: used_params[p] = True else: d = m.group(1) matched_exps = find(d, parent_nodes) # If something is not matched, this just gives up. Should we allow the user to create dependencies on the fly? if len(matched_exps) == 0: print 'Warning: could not match %s in the dependency. Did you specify it as a dependency?' % m.group(1) var=raw_input('Do you want me to check all older experiments? y/n') if(var=='y'): global all_nodes if all_nodes is None: all_nodes=read_descrs() # have_loaded_all = True matched_exps = find(d, all_nodes) if len(matched_exps) == 0: print 'Error: Could not match %s. Aborting.' % m.group(1) exit(1) else: print 'Aborting.' exit(1) if len(matched_exps) > 1: print 'Warning: found multiple matches for %s' % m.group(1) print 'Using latest (%s)' % time.ctime(matched_exps[0]['date']) deps.append(matched_exps[0].hsh) return os.path.join(util.abs_root_path(), RESULTS_DIR, matched_exps[0].hsh)
def purge(args): matches = find(args.exp) if len(matches) > 1 and not args.all: print 'Multiple matching experiments; use --all to purge them all' return resultsdir = os.path.join(util.abs_root_path(), RESULTS_PATH) for exp in matches: print 'Purging {} ({})'.format(exp['description'], exp.hsh) if not args.dry_run: try: shutil.rmtree(os.path.join(resultsdir, exp.hsh)) except Exception as e: print 'Could not remove directory: ', e
def setup_env(self): # Create experiments directory if it doesn't exist if not os.path.isdir(os.path.join(self.rootdir, exp_common.EXP_DIR)): os.makedirs(os.path.join(self.rootdir, exp_common.EXP_DIR)) # Make the results directory for this experiment if not os.path.isdir(self.exp_results): os.makedirs(self.exp_results) # Save the description and info save_descr(os.path.join(self.exp_results, exp_common.DESCR_FILE), self.info); # Make the experiment directories and checkout code. Do it # here so that you fail in the root node of the cluster, if # you fail if os.path.isdir(self.expdir): shutil.rmtree(self.expdir) try: os.mkdir(self.expdir) except OSError: print 'Experimental directory could not be created or already exists.' print 'Aborting.' exit(1) if self.subdir_only: checkout_dir = working_dir else: checkout_dir = '.' # checkout the appropriate commit # can do this with git --work-tree=... checkout commit -- ., but # cannot do concurrently, so use git archive... # ... whose behavior seems to depend on current directory rootdir=util.abs_root_path() os.chdir(rootdir) sts = util.exec_shell('git archive {} {} | tar xC {}' .format(self.info['commit'], checkout_dir, self.expdir)) if sts != 0: print 'Attempt to checkout experimental code failed' exit(1)
def read_descrs(keep_unreadable=False, keep_unfinished=False, keep_failed=False, keep_broken_deps=False): resultsdir = os.path.join(util.abs_root_path(), RESULTS_DIR) try: exp_dirs = os.listdir(resultsdir) except OSError: exp_dirs = [] exps = [] for exp_dir in exp_dirs: exp = dag.dag_node(hsh = exp_dir) if (exp.success() or (exp.failure() and keep_failed) or keep_unfinished): if keep_broken_deps or not exp.broken_deps(): exps.append(exp) sys.stderr.write('Finished reading descriptions...\n') return exps
def job_init(self): # Creating the new command # A bunch of directories we will need later on rootdir = util.abs_root_path() self.rootdir=rootdir self.working_dir = os.path.relpath(os.getcwd(), rootdir) self.resultsdir = os.path.join(rootdir, exp_common.RESULTS_DIR) if self.hsh is None: if self.code is None: self.new_cmd, deps = exp_common.expand_command(self.command, self.params, self.parents) self.hsh = util.sha1(self.commit + str(len(self.working_dir)) + self.working_dir + str(len(self.command)) + self.new_cmd) self.exp_results = os.path.join(self.resultsdir, self.hsh) self.expdir = os.path.join(rootdir, exp_common.EXP_DIR, self.hsh) self.new_cmd = self.new_cmd.replace('{}', self.exp_results) self.new_code=None else: # terrible terrible hack to prevent parameter # substitution for macros (since this syntax # interferes with Python list syntax). TODO: figure # out whether this is actually a good idea (hint: no). code = self.code.replace("[", "<---") code = code.replace("]", "--->") new_code, deps = exp_common.expand_command(code, self.params, self.parents) new_code = new_code.replace("<---", "[") self.new_code = new_code.replace("--->", "]") deps=[x.hsh for x in self.parents] self.hsh = util.sha1(self.commit + str(len(self.working_dir)) + self.working_dir + str(len(self.code)) + self.new_code + repr(deps)) self.exp_results = os.path.join(self.resultsdir, self.hsh) self.expdir = os.path.join(rootdir, exp_common.EXP_DIR, self.hsh) self.new_code = self.new_code.replace('{}', self.exp_results) self.new_cmd=None # try to read run info from disk self.info = load_info(self.hsh) else: self.info = load_info(self.hsh) if self.info is None: print "Error: could not load experiment %s." % (self.hsh) exit(1) self.new_cmd = self.info['final_command'] self.new_cmd = self.info['final_code'] self.deps = self.info['deps'] #exp_common.expand_command(self.info["command"], self.info["params"], self.deps()) self.desc = self.info['description'] self.exp_results = os.path.join(self.resultsdir, self.hsh) self.expdir = os.path.join(rootdir, exp_common.EXP_DIR, self.hsh) # if not found, intialize from scratch if self.info is None: self.info = dict() self.info['description'] = self.desc # description (string) self.info['working_dir'] = self.working_dir # TODO: figure out how to handle these implicit dependencies # The dependencies will be filled in later once the parents are finished. See setup_env. self.info['deps'] = set([x.hsh for x in self.parents] + deps) self.info['command'] = self.command # command to run (string) self.info['code'] = self.code # code to execute self.info['commit'] = self.commit # commit hash (string) self.info['date'] = time.time() self.info['params'] = self.params # parameters to pass (dictionary) self.info['run_state'] = RUN_STATE_VIRGIN self.info['return_code'] = None self.info['final_command']=self.new_cmd self.info['final_code']=self.new_code else: if self.info['description'] != self.desc: print "Warning: job description '%s' differs from " \ "saved description '%s'; using '%s'" \ % (self.desc, self.info['description'], \ self.info['description']) if self.rerun == True: self.info['run_state'] = RUN_STATE_VIRGIN self.info['return_code'] = None self.info['date'] = time.time() shutil.rmtree(self.exp_results) self.jobid = None