def model_experiments_finished(job: signac.Project.Job, key="succeeded"): if not model_experiments_needed(job): return False for featureJob, splitJob, feature_graph_name, feature_graph_files in feature_split_iter(job): if re.search(get_exp_regex(job), feature_graph_name) is None: continue elif splitJob not in utils.signac_tools.getSplitProject(featureJob).find_jobs( task_args.split_filter, task_args.split_doc_filter): continue md5_str = "_".join(map(lambda x: calculate_md5( splitJob.fn(x)), feature_graph_files)) exp_args_list = task_args.model_args or splitJob.doc.get( expCode, default=[]) if exp_args_list == [] and is_tuning(): exp_args_list = [""] for args in exp_args_list: if task_args.arg_regex is not None and re.search(task_args.arg_regex, args) is None: continue dataset_dir = splitJob.workspace() datasetDirObj = Path(dataset_dir) # Workspace path workspaceDirObj = datasetDirObj / workspaceRoot # type:Path if not (workspaceDirObj.exists() and workspaceDirObj.is_dir()): return False modelProject = signac.init_project( name=expProjectName, root=str(workspaceDirObj)) run_id = "{}@{}".format(args, md5_str) if is_tuning(): run_id += "[tuning]" if not any(map(lambda job_i: job_i.doc.get(key, False), modelProject.find_jobs(filter={"run_id": run_id}))): return False return True
def export_dataset(job: signac.Project): for featureJob, splitJob, feature_graph_name, feature_graph_files in feature_split_iter(job): exp_regex = get_exp_regex(job) if re.search(exp_regex, feature_graph_name) is None: continue elif splitJob not in utils.signac_tools.getSplitProject(featureJob).find_jobs( task_args.split_filter, task_args.split_doc_filter): continue elif is_tuning() and (splitJob.sp.get("split_index", None) not in {None, 0}): continue dataset_dir = Path(splitJob.workspace())
def clear_workspace(job: signac.Project.Job): for featureJob, splitJob, feature_graph_name, feature_graph_files in feature_split_iter( job): exp_regex = get_exp_regex(job) if re.search(exp_regex, feature_graph_name) is None: print("[run_model@{}] Regex {} not matching; skip on dataset {}". format(job.get_id(), exp_regex, feature_graph_name)) continue elif splitJob not in utils.signac_tools.getSplitProject( featureJob).find_jobs(task_args.split_filter, task_args.split_doc_filter): print("[run_model@{}] Filter {} not matching; skip on dataset {}". format(job.get_id(), (task_args.split_filter, task_args.split_doc_filter), feature_graph_name)) continue dataset_dir = splitJob.workspace() datasetDirObj = Path(dataset_dir) # Workspace path workspaceDirObj = datasetDirObj / workspaceRoot # type: Path if workspaceDirObj.exists(): assert workspaceDirObj.is_dir() if task_args.model_args: try: modelProject = signac.get_project( root=str(workspaceDirObj), search=False) md5_str = "_".join( map(lambda x: calculate_md5(splitJob.fn(x)), feature_graph_files)) for args in task_args.model_args: if task_args.arg_regex is not None and re.search( task_args.arg_regex, args) is None: print( "[run_model@{}] Regex {} not matching; skip on args {}" .format(job.get_id(), task_args.arg_regex, args)) continue run_id = "{}@{}".format(args, md5_str) for model_job in modelProject.find_jobs( filter={"run_id": run_id}): print("Removing folder {}".format( model_job.workspace())) shutil.rmtree(model_job.workspace()) except LookupError: pass else: print("Removing folder {}".format(workspaceDirObj)) shutil.rmtree(str(workspaceDirObj))
def clean_workspace(job: signac.Project.Job): for _, splitJob, feature_graph_name, feature_graph_files in feature_split_iter( job): exp_regex = get_exp_regex(job) if re.search(exp_regex, feature_graph_name) is None: print("[run_model@{}] Regex {} not matching; skip on dataset {}". format(job.get_id(), exp_regex, feature_graph_name)) continue dataset_dir = splitJob.workspace() datasetDirObj = Path(dataset_dir) if all(map(splitJob.isfile, feature_graph_files)): md5_str = "_".join( map(lambda x: calculate_md5(splitJob.fn(x)), feature_graph_files)) else: md5_str = None print( f"[clean_workspace@{job.get_id()}] Missing files for split {feature_graph_name}" ) # Workspace path workspaceDirObj = datasetDirObj / workspaceRoot # type: Path if workspaceDirObj.exists(): assert workspaceDirObj.is_dir() try: modelProject = signac.get_project(root=str(workspaceDirObj), search=False) for model_job in modelProject: if not model_job.doc.get("succeeded", False): target_dir = model_job.workspace() print( f"[clean_workspace@{job.get_id()}] Experiment not succeeded: removing folder {target_dir}" ) shutil.rmtree(target_dir) elif (md5_str is not None) and ( not model_job.sp.run_id.endswith(md5_str)): target_dir = model_job.workspace() print( f"[clean_workspace@{job.get_id()}] Experiment not matching current data: removing folder {target_dir}" ) shutil.rmtree(target_dir) except LookupError: pass
def generate_csv(job: signac.Project.Job, args): textBuffer = StringIO() textList = [] args.csv_data_dict["numClass"] = job.sp.numClass try: args.csv_data_dict["h"] = "{:.2f}".format(job.sp.h) except AttributeError: args.csv_data_dict["h"] = job.sp.HName args.csv_data_dict["Graph ID"] = job.get_id() args.csv_data_dict["Clustering Coefficient"] = job.doc.get( "avgClusteringCoeff") for featureJob, splitJob, feature_graph_name, feature_graph_files in feature_split_iter( job): feature_file = featureJob.doc.get("feature_file") if featureJob.doc.get("feature_name"): args.csv_data_dict["Feature"] = featureJob.doc["feature_name"] else: args.csv_data_dict["Feature"] = Path( feature_file.replace(job.sp.graphName + "-", "")).stem args.csv_data_dict["Graph Name"] = feature_graph_name args.csv_data_dict["Split Config"] = splitJob.sp.split_config md5_str = "_".join( map(lambda x: calculate_md5(splitJob.fn(x)), feature_graph_files)) dataset_dir = splitJob.workspace() datasetDirObj = Path(dataset_dir) # Workspace path workspaceDirObj = datasetDirObj / args.workspaceRoot try: gcnProject = signac.get_project(root=str(workspaceDirObj), search=False) except LookupError as e: print(e, file=sys.stderr) continue if args.exp_args is not None: exp_arg_list = args.exp_args elif args.add_args: exp_arg_list = list( set(splitJob.doc.get(args.exp_type, default=[])) | set(args.add_args)) else: exp_arg_list = splitJob.doc.get(args.exp_type, default=[]) for exp_args in exp_arg_list: args.csv_data_dict["Model Args"] = '"{}"'.format(exp_args) run_id = "{}@{}".format(exp_args, md5_str) job_iter = gcnProject.find_jobs(filter={"run_id": run_id}) if any( map(lambda job_i: job_i.doc.get("succeeded", False), job_iter)): assert len(job_iter) == 1, (args.csv_data_dict, run_id) # Parse experiment results for job_m in job_iter: args.csv_data_dict["Experiment ID"] = job_m.get_id() args.result_parser(job_m, args) if args.path_only: path = [ job.get_id(), featureJob.get_id(), splitJob.get_id(), "/", args.workspaceRoot, job_m.get_id() ] args.csv_data_dict["Job Path"] = json.dumps(path) assert len(args.csv_data_dict) == len(args.csv_header_list) # Write to text buffer textBuffer.write( ",".join(map(str, args.csv_data_dict.values())) + "\n") textList.append(list(map(str, args.csv_data_dict.values()))) if not args.path_only: # Write to the result file if not args.csv_file_generated: print(f"CSV will be saved to {args.output}") with open(args.output, "w") as csv_out: csv_out.write(",".join(args.csv_header_list) + "\n") csv_out.write(textBuffer.getvalue()) args.csv_file_generated = True else: with open(args.output, "a") as csv_out: csv_out.write(textBuffer.getvalue()) else: # Write to the result file csv_writer = csv.writer(sys.stdout) if not args.csv_file_generated: csv_writer.writerow(args.csv_header_list) csv_writer.writerows(textList) args.csv_file_generated = True else: csv_writer.writerows(textList)
def run_model(job: signac.Project.Job): logger = logging.getLogger('run_model@{}'.format(job.get_id())) logger.setLevel(logging.DEBUG) logger.propagate = False ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) chFormatter = logging.Formatter( '[{asctime} {name} {levelname:>8}] {message}', '%m-%d %H:%M:%S', '{') ch.setFormatter(chFormatter) logger.addHandler(ch) for featureJob, splitJob, feature_graph_name, feature_graph_files in feature_split_iter(job): exp_regex = get_exp_regex(job) if re.search(exp_regex, feature_graph_name) is None: print("[run_model@{}] Regex {} not matching; skip on dataset {}".format( job.get_id(), exp_regex, feature_graph_name)) continue elif splitJob not in utils.signac_tools.getSplitProject(featureJob).find_jobs( task_args.split_filter, task_args.split_doc_filter): print("[run_model@{}] Filter {} not matching; skip on dataset {}".format( job.get_id(), (task_args.split_filter, task_args.split_doc_filter), feature_graph_name)) continue elif is_tuning() and (splitJob.sp.get("split_index", None) not in {None, 0}): print("[run_model@{}] Split index is not 0 for tuning; skip on dataset {}".format( job.get_id(), feature_graph_name)) continue md5_str = "_".join(map(lambda x: calculate_md5( splitJob.fn(x)), feature_graph_files)) dataset_dir = splitJob.workspace() datasetDirObj = Path(dataset_dir) # Workspace path workspaceDirObj = datasetDirObj / workspaceRoot workspaceDirObj.mkdir(exist_ok=True, parents=True) modelProject = signac.init_project( name=expProjectName, root=str(workspaceDirObj)) fh = logging.FileHandler( str(workspaceDirObj / "terminal_output.log"), "a") fh.setLevel(logging.DEBUG) fhFormatter = logging.Formatter( '[{asctime} {levelname:>8}] {message}', '%m-%d %H:%M:%S', '{') fh.setFormatter(fhFormatter) logger.addHandler(fh) exp_args_list = task_args.model_args or splitJob.doc.get( expCode, default=[]) if exp_args_list == [] and is_tuning(): exp_args_list = [""] for args in exp_args_list: if task_args.arg_regex is not None and re.search(task_args.arg_regex, args) is None: print("[run_model@{}] Regex {} not matching; skip on args {}".format( job.get_id(), task_args.arg_regex, args)) continue run_id = "{}@{}".format(args, md5_str) if is_tuning(): run_id += "[tuning]" logger.removeHandler(fh) if any(map(lambda job_i: job_i.doc.get("succeeded", False), modelProject.find_jobs(filter={"run_id": run_id}))): print("[run_model@{}] Already run; skip on dataset {} for parameter {}".format( job.get_id(), feature_graph_name, args)) else: # Construct arguments args_split = args.split() dataset_args = dataset_args_func( dataset_dir=dataset_dir, feature_graph_name=feature_graph_name, run_id=run_id, workspaceDirObj=workspaceDirObj, task_args=task_args, featureJob=featureJob, args=args, args_split=args_split, splitJob=splitJob ) if dataset_args is None: raise ValueError( "dataset_args_func is not properly configured.") elif dataset_args is False: print("[run_model@{}] Skip on dataset {} for parameter {}".format( job.get_id(), feature_graph_name, args)) continue arg_list = [get_python_path(), "-u", modelScript] + \ dataset_args + args_split # Run model code print("[run_model@{}] run on dataset {} for parameter {}".format( job.get_id(), feature_graph_name, args)) try: logger.info( "===============\n>>>>Executing command {}\n===============".format(arg_list)) if not(job.doc.get("exp_terminal", False) or flags.log_to_terminal): ch.setLevel(logging.WARNING) ch.setFormatter(chFormatter) if task_args.interactive: proc = subprocess.Popen( arg_list, cwd=str(modelPathObj)) else: proc = subprocess.Popen(arg_list, cwd=str(modelPathObj), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, encoding='utf-8') if proc.stdout is not None: msgcount = 0 for line in iter(proc.stdout.readline, ''): msgcount += 1 logger.info(line.strip()) if msgcount % 100 == 0: logger.debug("running on dataset {} for parameter {}".format( feature_graph_name, args)) msgcount = 0 returncode = proc.wait() if returncode != 0: raise subprocess.CalledProcessError( returncode, arg_list) else: logger.debug("Completed on dataset {} for parameter {}".format( feature_graph_name, args)) except subprocess.CalledProcessError: logger.error("Check log at {}".format( workspaceDirObj / "terminal_output.log")) raise logger.info("===============") ch.setLevel(logging.INFO) # Tag job as succeeded (except when tuning) assert len(modelProject.find_jobs( filter={"run_id": run_id})) == 1 if not task_args.tuning: for job_m in modelProject.find_jobs(filter={"run_id": run_id}): job_m.doc["succeeded"] = True else: print("[run_model@{}]Job will not be tagged successful in tuning mode.".format( job.get_id())) logger.removeHandler(fh)