def assertDictSubsetOf(self, subset_dict, containing_dict): mm_items = [] mm_missing = [] for (key, value) in subset_dict.items(): if key in containing_dict: if value != containing_dict[key]: mm_items.append(key) else: mm_missing.append(key) err_items = len(mm_items) > 0 err_missing = len(mm_missing) > 0 if err_items or err_missing: subset_json = self._dictToPPJSON(subset_dict) containing_json = self._dictToPPJSON(containing_dict) error_string = "Expected the following:\n" error_string += "{}\n\nto be a subset of\n\n{}\n\n".format(subset_json, containing_json) if err_items: m = ", ".join(map(lambda x: str(x), mm_items)) error_string += "Field value mismatch at keys: {}\n".format(m) if err_missing: m = ", ".join(map(lambda x: str(x), mm_missing)) error_string += "Keys missing from superset: {}\n".format(m) self.assertFalse(True, error_string)
def assertDictSubsetOf(self, subset_dict, containing_dict): mm_items = [] mm_missing = [] for (key, value) in list(subset_dict.items()): if key in containing_dict: if value != containing_dict[key]: mm_items.append(key) else: mm_missing.append(key) err_items = len(mm_items) > 0 err_missing = len(mm_missing) > 0 if err_items or err_missing: subset_json = self._dictToPPJSON(subset_dict) containing_json = self._dictToPPJSON(containing_dict) error_string = "Expected the following:\n" error_string += "{}\n\nto be a subset of\n\n{}\n\n".format( subset_json, containing_json) if err_items: m = ", ".join([str(x) for x in mm_items]) error_string += "Field value mismatch at keys: {}\n".format(m) if err_missing: m = ", ".join([str(x) for x in mm_missing]) error_string += "Keys missing from superset: {}\n".format(m) self.assertFalse(True, error_string)
def tearDown(self): try: dxpy.api.project_destroy(self.project, {"terminateJobs": True}) except Exception as e: print("Failed to remove test project:", str(e)) if 'DX_PROJECT_CONTEXT_ID' in dxpy.config: del dxpy.config['DX_PROJECT_CONTEXT_ID'] if 'DX_CLI_WD' in dxpy.config: del dxpy.config['DX_CLI_WD']
def tearDown(self): try: subprocess.check_call(u"dx rmproject --yes --quiet {p}".format(p=self.project), shell=True) except Exception as e: print("Failed to remove test project:", str(e)) if 'DX_PROJECT_CONTEXT_ID' in os.environ: del os.environ['DX_PROJECT_CONTEXT_ID'] if 'DX_CLI_WD' in os.environ: del os.environ['DX_CLI_WD']
def exit_with_error(msg): ''' :param msg: string message to print before exiting Print the error message, as well as a blurb on where to find the job workspaces ''' msg += '\n' msg += 'Local job workspaces can be found in: ' + str(environ.get('DX_TEST_JOB_HOMEDIRS')) sys.exit(msg)
def tearDown(self): if "DX_USER_CONF_DIR" in os.environ: os.environ.pop("DX_USER_CONF_DIR") try: dxpy.api.project_destroy(self.project, {"terminateJobs": True}) except Exception as e: print("Failed to remove test project:", str(e)) if 'DX_PROJECT_CONTEXT_ID' in dxpy.config: del dxpy.config['DX_PROJECT_CONTEXT_ID'] if 'DX_CLI_WD' in dxpy.config: del dxpy.config['DX_CLI_WD']
def exit_with_error(msg): ''' :param msg: string message to print before exiting Print the error message, as well as a blurb on where to find the job workspaces ''' msg += '\n' msg += 'Local job workspaces can be found in: ' + str( environ.get('DX_TEST_JOB_HOMEDIRS')) sys.exit(msg)
def queue_entry_point(function, input_hash, depends_on=[], name=None): ''' :param function: function to run :param input_hash: input to new job :param depends_on: list of data object IDs and/or job IDs (local or remote) to wait for before the job can be run :type depends_on: list of strings :param name: job name (optional) :returns: new local job ID This function should only be called by a locally running job, so all relevant DX_TEST_* environment variables should be set. This function will set up the home directory for the job, add an entry in job_outputs.json, and append the job information to the job_queue.json file. (Both files found in $DX_TEST_JOB_HOMEDIRS.) ''' ensure_env_vars() all_job_outputs_path = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json') with open(all_job_outputs_path, 'r') as fd: all_job_outputs = json.load(fd, object_pairs_hook=collections.OrderedDict) job_id = 'localjob-' + str(len(all_job_outputs)) with open(all_job_outputs_path, 'wb') as fd: all_job_outputs[job_id] = None json.dump(all_job_outputs, fd, indent=4) fd.write(b'\n') job_homedir = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], job_id) os.mkdir(job_homedir) job_queue_path = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_queue.json') with open(job_queue_path, 'r') as fd: job_queue = json.load(fd) job_entry = { "id": job_id, "function": function, "input_hash": input_hash, "depends_on": depends_on } if name is not None: job_entry['name'] = name job_queue.append(job_entry) with open(job_queue_path, 'wb') as fd: json.dump(job_queue, fd, indent=4) fd.write(b'\n') return job_id
def make_apps(self, num_apps, name_prefix, bill_to=None): apps = [] app_spec = dict(self.base_app_spec) for i in range(num_apps): app_spec["name"] = name_prefix + "_" + str(i) if bill_to is not None: app_spec["billTo"] = bill_to app_dir = self.write_app_directory("minimal_åpp", json.dumps(app_spec), "code.py") app = json.loads(run("dx build --create-app --json " + app_dir)) apps.append(app) return apps
def queue_entry_point(function, input_hash, depends_on=[], name=None): ''' :param function: function to run :param input_hash: input to new job :param depends_on: list of data object IDs and/or job IDs (local or remote) to wait for before the job can be run :type depends_on: list of strings :param name: job name (optional) :returns: new local job ID This function should only be called by a locally running job, so all relevant DX_TEST_* environment variables should be set. This function will set up the home directory for the job, add an entry in job_outputs.json, and append the job information to the job_queue.json file. (Both files found in $DX_TEST_JOB_HOMEDIRS.) ''' ensure_env_vars() all_job_outputs_path = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json') with open(all_job_outputs_path, 'r') as fd: all_job_outputs = json.load(fd, object_pairs_hook=collections.OrderedDict) job_id = 'localjob-' + str(len(all_job_outputs)) with open(all_job_outputs_path, 'wb') as fd: all_job_outputs[job_id] = None json.dump(all_job_outputs, fd, indent=4) fd.write(b'\n') job_homedir = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], job_id) os.mkdir(job_homedir) job_queue_path = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_queue.json') with open(job_queue_path, 'r') as fd: job_queue = json.load(fd) job_entry = {"id": job_id, "function": function, "input_hash": input_hash, "depends_on": depends_on} if name is not None: job_entry['name'] = name job_queue.append(job_entry) with open(job_queue_path, 'wb') as fd: json.dump(job_queue, fd, indent=4) fd.write(b'\n') return job_id
def resolve_job_ref(jbor, job_outputs={}, should_resolve=True): ''' :param jbor: a dict that is a valid job-based object reference :type jbor: dict :param job_outputs: a dict of finished local jobs to their output hashes :type job_outputs: :class:`collections.OrderedDict` :returns: the referenced value if present :raises: :exc:`Exception` if the job-based object reference cannot be resolved TODO: Support metadata references ''' ref_job_id = get_job_from_jbor(jbor) ref_job_field = get_field_from_jbor(jbor) ref_job_index = get_index_from_jbor(jbor) def resolve_from_hash(output_hash): if ref_job_index is None: return output_hash[ref_job_field] else: return output_hash[ref_job_field][ref_job_index] if is_localjob_id(ref_job_id): if job_outputs.get(ref_job_id) is None: if should_resolve: raise Exception('Job ' + ref_job_id + ' not found in local finished jobs') else: return jbor if ref_job_field not in job_outputs[ref_job_id]: raise Exception('Cannot resolve a JBOR with job ID ' + ref_job_id + ' because field "' + ref_job_field + '" was not found in its output') return resolve_from_hash(job_outputs[ref_job_id]) else: dxjob = dxpy.DXJob(ref_job_id) try: dxjob.wait_on_done() except Exception as e: raise Exception('Could not wait for ' + ref_job_id + ' to finish: ' + str(e)) job_desc = dxjob.describe() if ref_job_field not in job_desc['output']: raise Exception('Cannot resolve a JBOR with job ID ' + ref_job_id + ' because field "' + ref_job_field + '" was not found in its output') return resolve_from_hash(job_desc['output'])
def wait_for_depends_on(depends_on, all_job_outputs): # Wait for depends_on and any data objects in the input to close if len(depends_on) > 0: print(fill('Processing dependsOn and any DNAnexus links to closing objects in the input')) for an_id in depends_on: try: print(' Waiting for ' + an_id + '...') if an_id.startswith('localjob'): if all_job_outputs.get(an_id) is None: raise Exception('Job ' + an_id + ' could not be found in local finished jobs') elif an_id.startswith('job'): dxjob = dxpy.DXJob(an_id) dxjob.wait_on_done() else: handler = dxpy.get_handler(an_id) desc = handler.describe() handler._wait_on_close() except Exception as e: raise Exception('Could not wait for ' + an_id + ': ' + str(e))
def make_apps(self, num_apps, name_prefix, bill_to=None): apps = [] app_spec = { "dxapi": "1.0.0", "runSpec": {"file": "code.py", "interpreter": "python2.7"}, "inputSpec": [], "outputSpec": [], "version": "1.0.0", } for i in range(num_apps): app_spec["name"] = name_prefix + "_" + str(i) if bill_to is not None: app_spec["billTo"] = bill_to app_dir = self.write_app_directory("minimal_åpp", json.dumps(app_spec), "code.py") app = json.loads(run("dx build --create-app --json " + app_dir)) apps.append(app) return apps
def make_apps(self, num_apps, name_prefix, bill_to=None): apps = [] app_spec = { "dxapi": "1.0.0", "runSpec": { "file": "code.py", "interpreter": "python2.7" }, "inputSpec": [], "outputSpec": [], "version": "1.0.0", } for i in range(num_apps): app_spec["name"] = name_prefix + "_" + str(i) if bill_to is not None: app_spec["billTo"] = bill_to app_dir = self.write_app_directory("minimal_åpp", json.dumps(app_spec), "code.py") app = json.loads(run("dx build --create-app --json " + app_dir)) apps.append(app) return apps
def wait_for_depends_on(depends_on, all_job_outputs): # Wait for depends_on and any data objects in the input to close if len(depends_on) > 0: print( fill( 'Processing dependsOn and any DNAnexus links to closing objects in the input' )) for an_id in depends_on: try: print(' Waiting for ' + an_id + '...') if an_id.startswith('localjob'): if all_job_outputs.get(an_id) is None: raise Exception( 'Job ' + an_id + ' could not be found in local finished jobs') elif an_id.startswith('job'): dxjob = dxpy.DXJob(an_id) dxjob.wait_on_done() else: handler = dxpy.get_handler(an_id) desc = handler.describe() handler._wait_on_close() except Exception as e: raise Exception('Could not wait for ' + an_id + ': ' + str(e))
def run_one_entry_point(job_id, function, input_hash, run_spec, depends_on, name=None): ''' :param job_id: job ID of the local job to run :type job_id: string :param function: function to run :type function: string :param input_hash: input for the job (may include job-based object references) :type input_hash: dict :param run_spec: run specification from the dxapp.json of the app :type run_spec: dict Runs the specified entry point and retrieves the job's output, updating job_outputs.json (in $DX_TEST_JOB_HOMEDIRS) appropriately. ''' print('======') job_homedir = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], job_id) job_env = environ.copy() job_env['HOME'] = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], job_id) all_job_outputs_path = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json') with open(all_job_outputs_path, 'r') as fd: all_job_outputs = json.load(fd, object_pairs_hook=collections.OrderedDict) if isinstance(name, basestring): name += ' (' + job_id + ':' + function + ')' else: name = job_id + ':' + function job_name = BLUE() + BOLD() + name + ENDC() print(job_name) # Resolve local job-based object references try: resolve_job_references(input_hash, all_job_outputs) except Exception as e: exit_with_error(job_name + ' ' + JOB_STATES('failed') + ' when resolving input:\n' + fill(str(e))) # Get list of non-closed data objects in the input that appear as # DNAnexus links; append to depends_on if depends_on is None: depends_on = [] get_implicit_depends_on(input_hash, depends_on) try: wait_for_depends_on(depends_on, all_job_outputs) except Exception as e: exit_with_error(job_name + ' ' + JOB_STATES('failed') + ' when processing depends_on:\n' + fill(str(e))) # Save job input to job_input.json with open(os.path.join(job_homedir, 'job_input.json'), 'wb') as fd: json.dump(input_hash, fd, indent=4) fd.write(b'\n') print( job_output_to_str(input_hash, title=(BOLD() + 'Input: ' + ENDC()), title_len=len("Input: ")).lstrip()) if run_spec['interpreter'] == 'bash': # Save job input to env vars env_path = os.path.join(job_homedir, 'environment') with open(env_path, 'w') as fd: job_input_file = os.path.join(job_homedir, 'job_input.json') var_defs_hash = file_load_utils.gen_bash_vars( job_input_file, job_homedir=job_homedir) for key, val in var_defs_hash.iteritems(): fd.write("{}={}\n".format(key, val)) print(BOLD() + 'Logs:' + ENDC()) start_time = datetime.datetime.now() if run_spec['interpreter'] == 'bash': script = ''' cd hotexamples_com; . {env_path}; . {code_path}; if [[ $(type -t {function}) == "function" ]]; then {function}; else echo "$0: Global scope execution complete. Not invoking entry point function {function} because it was not found" 1>&2; fi'''.format(homedir=pipes.quote(job_homedir), env_path=pipes.quote( os.path.join(job_env['HOME'], 'environment')), code_path=pipes.quote(environ['DX_TEST_CODE_PATH']), function=function) invocation_args = ['bash', '-c', '-e'] + ( ['-x'] if environ.get('DX_TEST_X_FLAG') else []) + [script] elif run_spec['interpreter'] == 'python2.7': script = '''#!/usr/bin/env python import os os.chdir(hotexamples_com) {code} import dxpy, json if dxpy.utils.exec_utils.RUN_COUNT == 0: dxpy.run() '''.format(homedir=repr(job_homedir), code=run_spec['code']) job_env['DX_TEST_FUNCTION'] = function invocation_args = ['python', '-c', script] if USING_PYTHON2: invocation_args = [ arg.encode(sys.stdout.encoding) for arg in invocation_args ] env = {k: v.encode(sys.stdout.encoding) for k, v in job_env.items()} else: env = job_env fn_process = subprocess.Popen(invocation_args, env=env) fn_process.communicate() end_time = datetime.datetime.now() if fn_process.returncode != 0: exit_with_error(job_name + ' ' + JOB_STATES('failed') + ', exited with error code ' + str(fn_process.returncode) + ' after ' + str(end_time - start_time)) # Now updating job output aggregation file with job's output job_output_path = os.path.join(job_env['HOME'], 'job_output.json') if os.path.exists(job_output_path): try: with open(job_output_path, 'r') as fd: job_output = json.load( fd, object_pairs_hook=collections.OrderedDict) except Exception as e: exit_with_error('Error: Could not load output of ' + job_name + ':\n' + fill(str(e.__class__) + ': ' + str(e))) else: job_output = {} print(job_name + ' -> ' + GREEN() + 'finished running' + ENDC() + ' after ' + str(end_time - start_time)) print( job_output_to_str(job_output, title=(BOLD() + "Output: " + ENDC()), title_len=len("Output: ")).lstrip()) with open( os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json'), 'r') as fd: all_job_outputs = json.load(fd, object_pairs_hook=collections.OrderedDict) all_job_outputs[job_id] = job_output # Before dumping, see if any new jbors should be resolved now for other_job_id in all_job_outputs: if all_job_outputs[other_job_id] is None: # Skip if job is not done yet (true for ancestor jobs) continue resolve_job_references(all_job_outputs[other_job_id], all_job_outputs, should_resolve=False) with open( os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json'), 'wb') as fd: json.dump(all_job_outputs, fd, indent=4) fd.write(b'\n')
def run_one_entry_point(job_id, function, input_hash, run_spec, depends_on, name=None): ''' :param job_id: job ID of the local job to run :type job_id: string :param function: function to run :type function: string :param input_hash: input for the job (may include job-based object references) :type input_hash: dict :param run_spec: run specification from the dxapp.json of the app :type run_spec: dict Runs the specified entry point and retrieves the job's output, updating job_outputs.json (in $DX_TEST_JOB_HOMEDIRS) appropriately. ''' print('======') job_homedir = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], job_id) job_env = environ.copy() job_env['HOME'] = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], job_id) all_job_outputs_path = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json') with open(all_job_outputs_path, 'r') as fd: all_job_outputs = json.load(fd, object_pairs_hook=collections.OrderedDict) if isinstance(name, basestring): name += ' (' + job_id + ':' + function + ')' else: name = job_id + ':' + function job_name = BLUE() + BOLD() + name + ENDC() print(job_name) # Resolve local job-based object references try: resolve_job_references(input_hash, all_job_outputs) except Exception as e: exit_with_error(job_name + ' ' + JOB_STATES('failed') + ' when resolving input:\n' + fill(str(e))) # Get list of non-closed data objects in the input that appear as # DNAnexus links; append to depends_on if depends_on is None: depends_on = [] get_implicit_depends_on(input_hash, depends_on) try: wait_for_depends_on(depends_on, all_job_outputs) except Exception as e: exit_with_error(job_name + ' ' + JOB_STATES('failed') + ' when processing depends_on:\n' + fill(str(e))) # Save job input to job_input.json with open(os.path.join(job_homedir, 'job_input.json'), 'wb') as fd: json.dump(input_hash, fd, indent=4) fd.write(b'\n') print(job_output_to_str(input_hash, title=(BOLD() + 'Input: ' + ENDC()), title_len=len("Input: ")).lstrip()) if run_spec['interpreter'] == 'bash': # Save job input to env vars env_path = os.path.join(job_homedir, 'environment') with open(env_path, 'w') as fd: # Following code is what is used to generate env vars on the remote worker fd.write("\n".join(["export {k}=( {vlist} )".format(k=k, vlist=" ".join([pipes.quote(vitem if isinstance(vitem, basestring) else json.dumps(vitem)) for vitem in v])) if isinstance(v, list) else "export {k}={v}".format(k=k, v=pipes.quote(v if isinstance(v, basestring) else json.dumps(v))) for k, v in input_hash.items()])) print(BOLD() + 'Logs:' + ENDC()) start_time = datetime.datetime.now() if run_spec['interpreter'] == 'bash': script = ''' cd hotexamples_com; . {env_path}; . {code_path}; if [[ $(type -t {function}) == "function" ]]; then {function}; else echo "$0: Global scope execution complete. Not invoking entry point function {function} because it was not found" 1>&2; fi'''.format(homedir=pipes.quote(job_homedir), env_path=pipes.quote(os.path.join(job_env['HOME'], 'environment')), code_path=pipes.quote(environ['DX_TEST_CODE_PATH']), function=function) invocation_args = ['bash', '-c', '-e'] + (['-x'] if environ.get('DX_TEST_X_FLAG') else []) + [script] elif run_spec['interpreter'] == 'python2.7': script = '''#!/usr/bin/env python import os os.chdir(hotexamples_com) {code} import dxpy, json if dxpy.utils.exec_utils.RUN_COUNT == 0: dxpy.run() '''.format(homedir=repr(job_homedir), code=run_spec['code']) job_env['DX_TEST_FUNCTION'] = function invocation_args = ['python', '-c', script] if USING_PYTHON2: invocation_args = [arg.encode(sys.stdout.encoding) for arg in invocation_args] env = {k: v.encode(sys.stdout.encoding) for k, v in job_env.items()} else: env = job_env fn_process = subprocess.Popen(invocation_args, env=env) fn_process.communicate() end_time = datetime.datetime.now() if fn_process.returncode != 0: exit_with_error(job_name + ' ' + JOB_STATES('failed') + ', exited with error code ' + str(fn_process.returncode) + ' after ' + str(end_time - start_time)) # Now updating job output aggregation file with job's output job_output_path = os.path.join(job_env['HOME'], 'job_output.json') if os.path.exists(job_output_path): try: with open(job_output_path, 'r') as fd: job_output = json.load(fd, object_pairs_hook=collections.OrderedDict) except Exception as e: exit_with_error('Error: Could not load output of ' + job_name + ':\n' + fill(str(e.__class__) + ': ' + str(e))) else: job_output = {} print(job_name + ' -> ' + GREEN() + 'finished running' + ENDC() + ' after ' + str(end_time - start_time)) print(job_output_to_str(job_output, title=(BOLD() + "Output: " + ENDC()), title_len=len("Output: ")).lstrip()) with open(os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json'), 'r') as fd: all_job_outputs = json.load(fd, object_pairs_hook=collections.OrderedDict) all_job_outputs[job_id] = job_output # Before dumping, see if any new jbors should be resolved now for other_job_id in all_job_outputs: if all_job_outputs[other_job_id] is None: # Skip if job is not done yet (true for ancestor jobs) continue resolve_job_references(all_job_outputs[other_job_id], all_job_outputs, should_resolve=False) with open(os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json'), 'wb') as fd: json.dump(all_job_outputs, fd, indent=4) fd.write(b'\n')
def generate_unique_username_email(): r = random.randint(0, 255) username = "******" + str(int(time.time())) + "_" + str(r) email = username + "@example.com" return username, email