def setUp(self): setUpTempProjects(self) self.dxapplet = dxpy.DXApplet() self.dxapplet.new(name="identity-record", dxapi="1.04", inputSpec=[{"name": "record", "class": "record"} ], outputSpec=[{"name": "record", "class": "record"}], runSpec={"code": ''' @dxpy.entry_point('main') def main(record): return {'record': record}''', "interpreter": "python2.7"}) dxrecord = dxpy.new_dxrecord(name='workflowname', details={"stages": [{"job": None, "inputs": {}, "app": dxpy.dxlink(self.dxapplet), "id": "stage0-id" }, {"job": None, "inputs": {"record": {"connectedTo": {"output": "record", "stage": "stage0-id"} } }, "app": dxpy.dxlink(self.dxapplet), "id": "stage1-id" }], "version": 5}, types=['pipeline']) self.workflow = dxpy.DXWorkflow(dxrecord.get_id()) self.closedrecord = dxpy.new_dxrecord(name='a record') self.closedrecord.close()
def test_init_from(self): dxrecord = dxpy.new_dxrecord(details={"foo": "bar"}, types=["footype"], tags=["footag"]) second_record = dxpy.new_dxrecord(init_from=dxrecord, types=["bartype"]) first_desc = dxrecord.describe(incl_details=True) second_desc = second_record.describe(incl_details=True) self.assertEqual(first_desc["details"], second_desc["details"]) self.assertEqual(first_desc["name"], second_desc["name"]) self.assertEqual(first_desc["tags"], second_desc["tags"]) self.assertFalse(first_desc["types"] == second_desc["types"])
def test_describe_dxrecord(self): dxrecord = dxpy.new_dxrecord() desc = dxrecord.describe() self.assertEqual(desc["project"], proj_id) self.assertEqual(desc["id"], dxrecord.get_id()) self.assertEqual(desc["class"], "record") self.assertEqual(desc["types"], []) self.assertTrue("created" in desc) self.assertEqual(desc["state"], "open") self.assertEqual(desc["hidden"], False) self.assertEqual(desc["links"], []) self.assertEqual(desc["name"], dxrecord.get_id()) self.assertEqual(desc["folder"], "/") self.assertEqual(desc["tags"], []) self.assertTrue("modified" in desc) self.assertFalse("properties" in desc) self.assertFalse("details" in desc) desc = dxrecord.describe(incl_properties=True) self.assertEqual(desc["properties"], {}) desc = dxrecord.describe(incl_details=True) self.assertEqual(desc["details"], {}) types = ["mapping", "foo"] tags = ["bar", "baz"] properties = {"project": "cancer"} hidden = True details = {"$dnanexus_link": dxrecord.get_id()} folder = "/a" name = "Name" second_dxrecord = dxpy.new_dxrecord(types=types, properties=properties, hidden=hidden, details=details, tags=tags, folder=folder, parents=True, name=name) desc = second_dxrecord.describe(True, True) self.assertEqual(desc["project"], proj_id) self.assertEqual(desc["id"], second_dxrecord.get_id()) self.assertEqual(desc["class"], "record") self.assertEqual(desc["types"], types) self.assertTrue("created" in desc) self.assertEqual(desc["state"], "open") self.assertEqual(desc["hidden"], hidden) self.assertEqual(desc["links"], [dxrecord.get_id()]) self.assertEqual(desc["name"], name) self.assertEqual(desc["folder"], "/a") self.assertEqual(desc["tags"], tags) self.assertTrue("modified" in desc) self.assertEqual(desc["properties"], properties) self.assertEqual(desc["details"], details)
def make_indexed_reference(job_inputs): logging.info("Indexing reference genome") run_shell("dx-contigset-to-fasta %s reference.fasta" % job_inputs['reference']['$dnanexus_link']) ref_details = dxpy.DXRecord(job_inputs['reference']['$dnanexus_link']).get_details() ref_name = dxpy.DXRecord(job_inputs['reference']['$dnanexus_link']).describe()['name'] # TODO: test if the genomes near the boundary work OK if sum(ref_details['contigs']['sizes']) < 2*1024*1024*1024: subprocess.check_call("bwa index -a is reference.fasta", shell=True) else: subprocess.check_call("bwa index -a bwtsw reference.fasta", shell=True) subprocess.check_call("XZ_OPT=-0 tar -cJf reference.tar.xz reference.fasta*", shell=True) indexed_ref_dxfile = dxpy.upload_local_file("reference.tar.xz", hidden=True, wait_on_close=True) indexed_ref_record = dxpy.new_dxrecord(name=ref_name + " (indexed for BWA)", types=["BwaLetterContigSetV3"], details={'index_archive': dxpy.dxlink(indexed_ref_dxfile.get_id()), 'original_contigset': job_inputs['reference']}) indexed_ref_record.close() # TODO: dxpy project workspace convenience functions # FIXME # if "projectWorkspace" in job: # indexed_ref_record.clone(job["projectWorkspace"]) return indexed_ref_record
def test_visibility_of_dxrecord(self): dxrecord = dxpy.new_dxrecord() dxrecord.hide() self.assertEqual(dxrecord.describe()["hidden"], True) dxrecord.unhide() self.assertEqual(dxrecord.describe()["hidden"], False)
def test_rename_dxrecord(self): dxrecord = dxpy.new_dxrecord() dxrecord.rename("newname") self.assertEqual(dxrecord.describe()["name"], "newname") dxrecord.rename("secondname") self.assertEqual(dxrecord.describe()["name"], "secondname")
def test_run_dxapplet(self): dxapplet = dxpy.DXApplet() dxapplet.new(name="test_applet", dxapi="1.04", inputSpec=[{"name": "chromosomes", "class": "record"}, {"name": "rowFetchChunk", "class": "int"} ], outputSpec=[{"name": "mappings", "class": "record"}], runSpec={"code": "def main(): pass", "interpreter": "python2.7", "execDepends": [{"name": "python-numpy"}]}) dxrecord = dxpy.new_dxrecord() dxrecord.close() prog_input = {"chromosomes": {"$dnanexus_link": dxrecord.get_id()}, "rowFetchChunk": 100} dxjob = dxapplet.run(applet_input=prog_input) jobdesc = dxjob.describe() self.assertEqual(jobdesc["class"], "job") self.assertEqual(jobdesc["function"], "main") self.assertEqual(jobdesc["originalInput"], prog_input) self.assertEqual(jobdesc["originJob"], jobdesc["id"]) self.assertEqual(jobdesc["parentJob"], None) self.assertEqual(jobdesc["applet"], dxapplet.get_id()) self.assertEqual(jobdesc["project"], dxapplet.get_proj_id()) self.assertTrue("state" in jobdesc) self.assertTrue("created" in jobdesc) self.assertTrue("modified" in jobdesc) self.assertTrue("launchedBy" in jobdesc) self.assertTrue("output" in jobdesc) dxjob.terminate()
def test_escaping(self): # TODO: test backslash-escaping behavior for use with dx ls # (aside from special characters, escape the string so that # "*" and "?" aren't used as part of the glob pattern, escape # "/") r = dxpy.new_dxrecord(name='my <<awesome.>> record !@#$%^&*(){}[]|;:?`') self.assert_completion('dx ls my', 'my \\<\\<awesome.\\>\\> record \\!\\@#$%^\\&*\\(\\){}[]\\|\\;\\\\:?\\` ')
def find_jobs(self): dxapplet = dxpy.DXApplet() dxapplet.new(name="test_applet", inputSpec=[{"name": "chromosomes", "class": "record"}, {"name": "rowFetchChunk", "class": "int"} ], outputSpec=[{"name": "mappings", "class": "record"}], runSpec={"code": "def main(): pass", "interpreter": "python2.7", "execDepends": [{"name": "python-numpy"}]}) dxrecord = dxpy.new_dxrecord() dxrecord.close() prog_input = {"chromosomes": {"$dnanexus_link": dxrecord.get_id()}, "rowFetchChunk": 100} dxjob = dxapplet.run(applet_input=prog_input) results = list(dxpy.find_jobs(launched_by='user-000000000000000000000000', applet=dxapplet, project=dxapplet.get_proj_id(), origin_job=dxjob.get_id(), parent_job=None, modified_after=0, describe=True)) self.assertEqual(len(results), 1) result = results[0] self.assertEqual(result["id"], dxjob.get_id()) self.assertTrue("describe" in result) self.assertEqual(result["describe"]["id"], dxjob.get_id()) self.assertEqual(result["describe"]["class"], "job") self.assertEqual(result["describe"]["applet"], dxapplet.get_id()) self.assertEqual(result["describe"]["project"], dxapplet.get_proj_id()) self.assertEqual(result["describe"]["originJob"], dxjob.get_id()) self.assertEqual(result["describe"]["parentJob"], None)
def make_indexed_reference( ref_ID ): run_shell("dx-contigset-to-fasta %s reference.fasta" % ref_ID) ref_details = dxpy.DXRecord(ref_ID).get_details() ref_name = dxpy.DXRecord(ref_ID).describe()['name'] # call bowtie2-build run_shell("bowtie2-build reference.fasta indexed_ref") # package it into an archive for uploading run_shell("XZ_OPT=-0 tar -cJf reference.tar.xz indexed_ref*") indexed_ref_dxfile = dxpy.upload_local_file("reference.tar.xz", hidden=True, wait_on_close=True) indexed_ref_record = dxpy.new_dxrecord(name=ref_name + " (indexed for Bowtie2)", types=["BowtieLetterContigSetV2"], details={'index_archive': dxpy.dxlink(indexed_ref_dxfile.get_id()), 'original_contigset': dxpy.dxlink(ref_ID)}) indexed_ref_record.close() ''' # TODO: dxpy project workspace convenience functions if "projectWorkspace" in job: indexed_ref_record.clone(job["projectWorkspace"]) ''' return indexed_ref_record.get_id()
def test_clone(self): dxproject = dxpy.DXProject() dxproject.new_folder("/a/b/c/d", parents=True) dxrecords = [] for i in range(4): dxrecords.append(dxpy.new_dxrecord(name=("record-%d" % i))) with self.assertRaises(DXAPIError): dxproject.clone(second_proj_id, destination="/", objects=[dxrecords[0].get_id(), dxrecords[1].get_id()], folders=["/a/b/c/d"]) dxrecords[0].close() dxrecords[1].close() dxproject.clone(second_proj_id, destination="/", objects=[dxrecords[0].get_id(), dxrecords[1].get_id()], folders=["/a/b/c/d"]) second_proj = dxpy.DXProject(second_proj_id) listf = second_proj.list_folder() self.assertEqual(get_objects_from_listf(listf).sort(), [dxrecords[0].get_id(), dxrecords[1].get_id()].sort()) self.assertEqual(listf["folders"], ["/d"])
def make_indexed_reference(ref_ID): run_shell("dx-contigset-to-fasta %s reference.fasta" % ref_ID) ref_details = dxpy.DXRecord(ref_ID).get_details() ref_name = dxpy.DXRecord(ref_ID).describe()['name'] # call bowtie2-build run_shell("bowtie2-build reference.fasta indexed_ref") # package it into an archive for uploading run_shell("XZ_OPT=-0 tar -cJf reference.tar.xz indexed_ref*") indexed_ref_dxfile = dxpy.upload_local_file("reference.tar.xz", hidden=True, wait_on_close=True) indexed_ref_record = dxpy.new_dxrecord( name=ref_name + " (indexed for Bowtie2)", types=["BowtieLetterContigSetV2"], details={ 'index_archive': dxpy.dxlink(indexed_ref_dxfile.get_id()), 'original_contigset': dxpy.dxlink(ref_ID) }) indexed_ref_record.close() ''' # TODO: dxpy project workspace convenience functions if "projectWorkspace" in job: indexed_ref_record.clone(job["projectWorkspace"]) ''' return indexed_ref_record.get_id()
def copy_across_regions(local_path, record, dest_region, dest_proj, dest_folder): print("copy_across_regions {} {} {} {}:{}".format(local_path, record.get_id(), dest_region, dest_proj.get_id(), dest_folder)) # check if we haven't already created this record, and uploaded the file dest_asset = find_asset(dest_proj, dest_folder) if dest_asset is not None: print("Already copied to region {}".format(dest_region)) return AssetDesc(dest_region, dest_asset.get_id(), dest_proj) # upload dest_proj.new_folder(dest_folder, parents=True) dxfile = upload_local_file(local_path, dest_proj, dest_folder, hidden=True) fid = dxfile.get_id() dest_asset = dxpy.new_dxrecord(name=record.name, types=['AssetBundle'], details={'archiveFileId': dxpy.dxlink(fid)}, properties=record.get_properties(), project=dest_proj.get_id(), folder=dest_folder, close=True) return AssetDesc(dest_region, dest_asset.get_id(), dest_proj)
def test_new_list_remove_folders(self): dxproject = dxpy.DXProject() listf = dxproject.list_folder() self.assertEqual(listf["folders"], []) self.assertEqual(listf["objects"], []) dxrecord = dxpy.new_dxrecord() dxproject.new_folder("/a/b/c/d", parents=True) listf = dxproject.list_folder() self.assertEqual(listf["folders"], ["/a"]) self.assertEqual(listf["objects"], [{"id": dxrecord.get_id()}]) listf = dxproject.list_folder("/a") self.assertEqual(listf["folders"], ["/a/b"]) self.assertEqual(listf["objects"], []) listf = dxproject.list_folder("/a/b") self.assertEqual(listf["folders"], ["/a/b/c"]) listf = dxproject.list_folder("/a/b/c") self.assertEqual(listf["folders"], ["/a/b/c/d"]) listf = dxproject.list_folder("/a/b/c/d") self.assertEqual(listf["folders"], []) with self.assertRaises(DXAPIError): dxproject.remove_folder("/a") dxproject.remove_folder("/a/b/c/d") dxproject.remove_folder("/a//b////c/") dxproject.remove_folder("/a/b") dxproject.remove_folder("/a") dxrecord.remove() listf = dxproject.list_folder() self.assertEqual(listf["objects"], [])
def test_set_id(self): dxrecord = dxpy.new_dxrecord() second_dxrecord = dxpy.DXRecord() second_dxrecord.set_ids(dxrecord.get_id(), dxrecord.get_proj_id()) self.assertEqual(second_dxrecord.get_id(), dxrecord.get_id()) self.assertEqual(second_dxrecord.get_proj_id(), proj_id) dxrecord.remove()
def test_get_appet_with_asset(self): bundle_name = "test-bundle-depends.tar.gz" bundle_tmp_dir = tempfile.mkdtemp() os.mkdir(os.path.join(bundle_tmp_dir, "a")) with open(os.path.join(bundle_tmp_dir, 'a', 'foo.txt'), 'w') as file_in_bundle: file_in_bundle.write('foo\n') subprocess.check_call(['tar', '-czf', os.path.join(bundle_tmp_dir, bundle_name), '-C', os.path.join(bundle_tmp_dir, 'a'), '.']) bundle_file = dxpy.upload_local_file(filename=os.path.join(bundle_tmp_dir, bundle_name), project=self.project, wait_on_close=True) asset_file = dxpy.upload_local_file(filename=os.path.join(bundle_tmp_dir, bundle_name), project=self.project, wait_on_close=True) dxrecord_details = {"archiveFileId": {"$dnanexus_link": asset_file.get_id()}} dxrecord = dxpy.new_dxrecord(project=self.project, types=["AssetBundle"], details=dxrecord_details, name='asset-lib-test', properties={"version": "0.0.1"}) dxrecord.close() asset_bundle_id = dxrecord.get_id() asset_file.set_properties({"AssetBundle": asset_bundle_id}) code_str = """#!/bin/bash main(){ echo 'Hello World' } """ app_spec = { "name": "asset_depends", "dxapi": "1.0.0", "runSpec": { "code": code_str, "interpreter": "bash", "assetDepends": [{"id": asset_bundle_id}], "bundledDepends": [{"name": bundle_name, "id": {"$dnanexus_link": bundle_file.get_id()}}] }, "inputSpec": [], "outputSpec": [], "version": "1.0.0" } app_dir = self.write_app_directory("asset_depends", json.dumps(app_spec)) asset_applet_id = json.loads(run("dx build --json {app_dir}".format(app_dir=app_dir)))["id"] with chdir(tempfile.mkdtemp()): run("dx get --omit-resources " + asset_applet_id) self.assertTrue(os.path.exists("asset_depends")) self.assertFalse(os.path.exists(os.path.join("asset_depends", "resources"))) self.assertTrue(os.path.exists(os.path.join("asset_depends", "dxapp.json"))) applet_spec = json.load(open(os.path.join("asset_depends", "dxapp.json"))) self.assertEqual([{"name": "asset-lib-test", "project": self.project, "folder": "/", "version": "0.0.1"} ], applet_spec["runSpec"]["assetDepends"]) self.assertEqual([{"name": bundle_name, "id": {"$dnanexus_link": bundle_file.get_id()}}], applet_spec["runSpec"]["bundledDepends"])
def test_job_from_app(self): test_json = dxpy.new_dxrecord({"details": {"jobsuccess": False} }) job_id_json = dxpy.new_dxrecord({"details": {"jobid": None} }) dxapplet = dxpy.new_dxapplet(codefile='test_dxjob.py') dxappletjob = dxapplet.run({"json_dxid": test_json.get_id(), "job_id_json": job_id_json.get_id()}) dxappletjob.wait_on_done() dxjob_id = job_id_json.get_details()["jobid"] self.assertIsNotNone(dxjob_id) dxjob = dxpy.DXJob(dxjob_id) dxjob.wait_on_done() self.assertEqual(test_json.get_details(), {"jobsuccess":True}) test_json.remove() dxapplet.remove()
def test_types_of_dxrecord(self): dxrecord = dxpy.new_dxrecord() types = ["foo", "othertype"] dxrecord.add_types(types) self.assertEqual(dxrecord.describe()["types"], types) dxrecord.remove_types(["foo"]) self.assertEqual(dxrecord.describe()["types"], ["othertype"])
def test_tags_of_dxrecord(self): dxrecord = dxpy.new_dxrecord() tags = ["foo", "othertag"] dxrecord.add_tags(tags) self.assertEqual(dxrecord.describe()["tags"], tags) dxrecord.remove_tags(["foo"]) self.assertEqual(dxrecord.describe()["tags"], ["othertag"])
def test_set_properties_of_dxrecord(self): dxrecord = dxpy.new_dxrecord() properties = {"project": "cancer project", "foo": "bar"} dxrecord.set_properties(properties) desc = dxrecord.describe(True) self.assertEqual(desc["properties"], properties) dxrecord.set_properties({"project": None}) self.assertEqual(dxrecord.describe(True)["properties"], {"foo": "bar"})
def setUpClass(cls): if 'DXTEST_FUSE' not in os.environ: return proj_name = u"dxclient_test_pröject" cls.project_id = subprocess.check_output(u"dx new project '{p}' --brief".format(p=proj_name), shell=True).strip() dxpy.config["DX_PROJECT_CONTEXT_ID"] = cls.project_id dxpy.config["DX_CLI_WD"] = '/' cls.project = dxpy.DXProject(cls.project_id) dxpy.config.__init__(suppress_warning=True) subprocess.check_call(['dx', 'mkdir', 'foo']) subprocess.check_call(['dx', 'mkdir', 'bar']) dxpy.upload_local_file(__file__, wait_on_close=True) dxpy.new_dxrecord(name="A/B testing") cls.mountpoint = tempfile.mkdtemp() # TODO: redirect logs to someplace in case we need to debug # problems in these tests subprocess.check_call(['dx-mount', cls.mountpoint])
def test_remove_objects(self): dxproject = dxpy.DXProject() dxrecord = dxpy.new_dxrecord() listf = dxproject.list_folder() self.assertEqual(get_objects_from_listf(listf), [dxrecord.get_id()]) dxproject.remove_objects([dxrecord.get_id()]) listf = dxproject.list_folder() self.assertEqual(listf["objects"], []) with self.assertRaises(DXAPIError): dxrecord.describe()
def test_move(self): dxproject = dxpy.DXProject() dxproject.new_folder("/a/b/c/d", parents=True) dxrecord = dxpy.new_dxrecord() dxrecord.move("/a/b/c") listf = dxproject.list_folder() self.assertEqual(listf["objects"], []) listf = dxproject.list_folder("/a/b/c") self.assertEqual(get_objects_from_listf(listf), [dxrecord.get_id()]) desc = dxrecord.describe() self.assertEqual(desc["folder"], "/a/b/c")
def setUpClass(cls): if 'DXTEST_FUSE' not in os.environ: return proj_name = u"dxclient_test_pröject" cls.project_id = subprocess.check_output( u"dx new project '{p}' --brief".format(p=proj_name), shell=True).strip() dxpy.config["DX_PROJECT_CONTEXT_ID"] = cls.project_id dxpy.config["DX_CLI_WD"] = '/' cls.project = dxpy.DXProject(cls.project_id) dxpy.config.__init__(suppress_warning=True) subprocess.check_call(['dx', 'mkdir', 'foo']) subprocess.check_call(['dx', 'mkdir', 'bar']) dxpy.upload_local_file(__file__, wait_on_close=True) dxpy.new_dxrecord(name="A/B testing") cls.mountpoint = tempfile.mkdtemp() # TODO: redirect logs to someplace in case we need to debug # problems in these tests subprocess.check_call(['dx-mount', cls.mountpoint])
def find_data_objs(self): dxrecord = dxpy.new_dxrecord() results = list(dxpy.search.find_data_objects(state="open")) self.assertEqual(len(results), 1) self.assertEqual(results[0], {"project": proj_id, "id": dxrecord.get_id()}) results = list(dxpy.search.find_data_objects(state="closed")) self.assertEqual(len(results), 0) dxrecord.close() results = list(dxpy.search.find_data_objects(state="closed")) self.assertEqual(len(results), 1) self.assertEqual(results[0], {"project": proj_id, "id": dxrecord.get_id()})
def test_get_set_details(self): details_no_link = {"foo": "bar"} dxrecord = dxpy.new_dxrecord() dxrecord.set_details(details_no_link) self.assertEqual(dxrecord.get_details(), details_no_link) self.assertEqual(dxrecord.describe()["links"], []) details_two_links = [{"$dnanexus_link": dxrecord.get_id()}, {"$dnanexus_link": dxrecord.get_id()}] dxrecord.set_details(details_two_links) self.assertEqual(dxrecord.get_details(), details_two_links) self.assertEqual(dxrecord.describe()["links"], [dxrecord.get_id()])
def test_close_dxrecord(self): dxrecord = dxpy.new_dxrecord() dxrecord.close() with self.assertRaises(DXAPIError): dxrecord.hide() with self.assertRaises(DXAPIError): dxrecord.set_details(["foo"]) self.assertEqual(dxrecord.get_details(), {}) dxrecord.rename("newname") self.assertEqual(dxrecord.describe()["name"], "newname") dxrecord.rename("secondname") self.assertEqual(dxrecord.describe()["name"], "secondname")
def create_record(destination, file_ids, width=None, height=None): """ Creates a master record for the HTML report; this doesn't contain contain the actual HTML, but reports are required to be records rather than files and we can link more than one HTML file to a report """ [project, path, name] = parse_destination(destination) files = [dxpy.dxlink(file_id) for file_id in file_ids] details = {"files": files} if width: details["width"] = width if height: details["height"] = height try: dxrecord = dxpy.new_dxrecord(project=project, folder=path, types=["Report", "HTMLReport"], details=details, name=name) dxrecord.close() return dxrecord.get_id() except dxpy.DXAPIError as ex: parser.error("Could not create an HTML report record on DNAnexus servers! ({ex})".format(ex=ex))
def test_get_handler(self): dxpy.set_workspace_id(self.second_proj_id) dxrecord = dxpy.new_dxrecord(project=self.proj_id) # Simple DXLink dxlink = {'$dnanexus_link': dxrecord.get_id()} handler = dxpy.get_handler(dxlink) self.assertEqual(handler.get_id(), dxrecord.get_id()) # Default project is not going to be the correct one self.assertNotEqual(handler.get_proj_id(), self.proj_id) # Extended DXLink dxlink = {'$dnanexus_link': {'id': dxrecord.get_id(), 'project': self.proj_id}} handler = dxpy.get_handler(dxlink) self.assertEqual(handler.get_id(), dxrecord.get_id()) self.assertEqual(handler.get_proj_id(), self.proj_id) # Handle project IDs dxproject = dxpy.get_handler(self.proj_id)
def test_move(self): dxproject = dxpy.DXProject() dxproject.new_folder("/a/b/c/d", parents=True) dxrecords = [] for i in range(4): dxrecords.append(dxpy.new_dxrecord(name=("record-%d" % i))) dxproject.move(destination="/a", objects=[dxrecords[0].get_id(), dxrecords[1].get_id()], folders=["/a/b/c/d"]) listf = dxproject.list_folder() self.assertEqual(get_objects_from_listf(listf).sort(), [dxrecords[2].get_id(), dxrecords[3].get_id()].sort()) self.assertEqual(listf["folders"], ["/a"]) listf = dxproject.list_folder("/a") self.assertEqual(get_objects_from_listf(listf).sort(), [dxrecords[0].get_id(), dxrecords[1].get_id()].sort()) self.assertEqual(listf["folders"], ["/a/b", "/a/d"]) desc = dxrecords[0].describe() self.assertEqual(desc["folder"], "/a")
def makeGenomeObject(): # NOTE: for these tests we don't upload a full sequence file (which # would be huge, for hg19). Importers and exporters that need to # look at the full sequence file can't be run on this test # contigset. sequence_file = dxpy.upload_string("", hidden=True) genome_record = dxpy.new_dxrecord() genome_record.set_details({ "flat_sequence_file": {"$dnanexus_link": sequence_file.get_id()}, "contigs": { "offsets": [0], "names": ["chr1"], "sizes": [249250621] } }) genome_record.add_types(["ContigSet"]) genome_record.close() sequence_file.wait_on_close() return genome_record.get_id()
def test_clone(self): dxrecord = dxpy.new_dxrecord(name="firstname", tags=["tag"]) with self.assertRaises(DXAPIError): second_dxrecord = dxrecord.clone(second_proj_id) dxrecord.close() second_dxrecord = dxrecord.clone(second_proj_id) second_dxrecord.rename("newname") first_desc = dxrecord.describe() second_desc = second_dxrecord.describe() self.assertEqual(first_desc["id"], dxrecord.get_id()) self.assertEqual(second_desc["id"], dxrecord.get_id()) self.assertEqual(first_desc["project"], proj_id) self.assertEqual(second_desc["project"], second_proj_id) self.assertEqual(first_desc["name"], "firstname") self.assertEqual(second_desc["name"], "newname") self.assertEqual(first_desc["tags"], ["tag"]) self.assertEqual(second_desc["tags"], ["tag"]) self.assertEqual(first_desc["created"], second_desc["created"]) self.assertEqual(first_desc["state"], "closed") self.assertEqual(second_desc["state"], "closed")
def main(): args = parse_args() check_input(args) run_id = get_run_id(args.run_dir) # Set all naming conventions REMOTE_RUN_FOLDER = "/" + run_id + "/runs" REMOTE_READS_FOLDER = "/" + run_id + "/reads" REMOTE_ANALYSIS_FOLDER = "/" + run_id + "/analyses" FILE_PREFIX = "run." + run_id + ".lane." # Prep log & record names lane_info = [] # If no lanes are specified, set lane to all, otherwise, set to array of lanes if not args.num_lanes: lanes_to_upload = ["all"] else: lanes_to_upload = [str(i) for i in range(1, args.num_lanes + 1)] for lane in lanes_to_upload: lane_prefix = FILE_PREFIX + lane lane_info.append({ "lane": lane, "prefix": lane_prefix, "log_path": os.path.join(args.log_dir, lane_prefix + ".log"), "record_name": lane_prefix + ".upload_sentinel", "remote_folder": get_target_folder(REMOTE_RUN_FOLDER, lane), "uploaded": False }) # Create upload sentinel for upload, if record already exists, use that done_count = 0 for lane in lane_info: lane_num = lane["lane"] try: old_record = dxpy.find_one_data_object( zero_ok=True, typename="UploadSentinel", name=lane["record_name"], project=args.project, folder=lane["remote_folder"]) except dxpy.exceptions.DXSearchError as e: raise_error( "Encountered an error looking for %s at %s:%s. %s" % (lane["record_name"], lane["remote_folder"], args.project, e)) if old_record: lane["dxrecord"] = dxpy.get_handler(old_record["id"], project=old_record["project"]) if lane["dxrecord"].describe()["state"] == "closed": print_stderr("Run %s, lane %s has already been uploaded" % (run_id, lane_num)) lane["uploaded"] = True done_count += 1 else: properties = {"run_id": run_id, "lanes": lane_num} lane["dxrecord"] = dxpy.new_dxrecord(types=["UploadSentinel"], project=args.project, folder=lane["remote_folder"], parents=True, name=lane["record_name"], properties=properties) # upload RunInfo here, before uploading any data, unless it is already uploaded. record = lane["dxrecord"] properties = record.get_properties() runInfo = dxpy.find_one_data_object(zero_ok=True, name="RunInfo.xml", project=args.project, folder=lane["remote_folder"]) if not runInfo: lane["runinfo_file_id"] = upload_single_file( args.run_dir + "/RunInfo.xml", args.project, lane["remote_folder"], properties) else: lane["runinfo_file_id"] = runInfo["id"] # Upload samplesheet unless samplesheet-delay is specified or it is already uploaded. if not args.samplesheet_delay: sampleSheet = dxpy.find_one_data_object( zero_ok=True, name="SampleSheet.csv", project=args.project, folder=lane["remote_folder"]) if not sampleSheet: lane["samplesheet_file_id"] = upload_single_file( args.run_dir + "/SampleSheet.csv", args.project, lane["remote_folder"], properties) else: lane["samplesheet_file_id"] = sampleSheet["id"] if done_count == len(lane_info): print_stderr("EXITING: All lanes already uploaded") sys.exit(1) seconds_to_wait = (dxpy.utils.normalize_timedelta(args.run_duration) / 1000 * args.intervals_to_wait) print_stderr("Maximum allowable time for run to complete: %d seconds." % seconds_to_wait) initial_start_time = time.time() # While loop waiting for RTAComplete.txt or RTAComplete.xml while not termination_file_exists(args.novaseq, args.run_dir): start_time = time.time() run_time = start_time - initial_start_time # Fail if run time exceeds total time to wait if run_time > seconds_to_wait: print_stderr( "EXITING: Upload failed. Run did not complete after %d seconds (max wait = %ds)" % (run_time, seconds_to_wait)) sys.exit(1) # Loop through all lanes in run directory for lane in lane_info: lane_num = lane["lane"] if lane["uploaded"]: continue run_sync_dir(lane, args) # Wait at least the minimum time interval before running the loop again cur_time = time.time() diff = cur_time - start_time if diff < args.sync_interval: print_stderr("Sleeping for %d seconds" % (int(args.sync_interval - diff))) time.sleep(int(args.sync_interval - diff)) # Final synchronization, upload data, set details for lane in lane_info: if lane["uploaded"]: continue file_ids = run_sync_dir(lane, args, finish=True) record = lane["dxrecord"] properties = record.get_properties() lane["log_file_id"] = upload_single_file(lane["log_path"], args.project, lane["remote_folder"], properties) for file_id in file_ids: dxpy.get_handler(file_id, project=args.project).set_properties(properties) details = { 'run_id': run_id, 'lanes': lane["lane"], 'upload_thumbnails': str(args.upload_thumbnails).lower(), 'dnanexus_path': args.project + ":" + lane["remote_folder"], 'tar_file_ids': file_ids } # Upload sample sheet here, if samplesheet-delay specified if args.samplesheet_delay: lane["samplesheet_file_id"] = upload_single_file( args.run_dir + "/SampleSheet.csv", args.project, lane["remote_folder"], properties) # ID to singly uploaded file (when uploaded successfully) if lane.get("log_file_id"): details.update({'log_file_id': lane["log_file_id"]}) if lane.get("runinfo_file_id"): details.update({'runinfo_file_id': lane["runinfo_file_id"]}) if lane.get("samplesheet_file_id"): details.update( {'samplesheet_file_id': lane["samplesheet_file_id"]}) record.set_details(details) record.close() print_stderr("Run %s successfully streamed!" % (run_id)) downstream_input = {} if args.downstream_input: try: input_dict = json.loads(args.downstream_input) except ValueError as e: raise_error( "Failed to read downstream input as JSON string. %s. %s" % (args.downstream_input, e)) if not isinstance(input_dict, dict): raise_error("Expected a dict for downstream input. Got %s." % input_dict) for k, v in list(input_dict.items()): if not (isinstance(k, str) and (isinstance(v, str) or isinstance(v, dict))): raise_error( "Expected (string) key and (string or dict) value pairs for downstream input. Got (%s)%s (%s)%s" % (type(k), k, type(v), v)) downstream_input[k] = v if args.applet: # project verified in check_input, assuming no change project = dxpy.get_handler(args.project) print_stderr("Initiating downstream analysis: given app(let) id %s" % args.applet) for info in lane_info: lane = info["lane"] record = info["dxrecord"] # applet verified in check_input, assume no change applet = dxpy.get_handler(args.applet) # Prepare output folder, if downstream analysis specified reads_target_folder = get_target_folder(REMOTE_READS_FOLDER, lane) print_stderr("Creating output folder %s" % (reads_target_folder)) try: project.new_folder(reads_target_folder, parents=True) except dxpy.DXError as e: raise_error("Failed to create new folder %s. %s" % (reads_target_folder, e)) # Decide on job name (<executable>-<run_id>) job_name = applet.title + "-" + run_id # Overwite upload_sentinel_record input of applet to the record of inc upload downstream_input["upload_sentinel_record"] = dxpy.dxlink(record) # Run specified applet job = applet.run(downstream_input, folder=reads_target_folder, project=args.project, name=job_name) print_stderr("Initiated job %s from applet %s for lane %s" % (job, args.applet, lane)) # Close if args.applet # args.workflow and args.applet are mutually exclusive elif args.workflow: # project verified in check_input, assuming no change project = dxpy.get_handler(args.project) print_stderr("Initiating downstream analysis: given workflow id %s" % args.workflow) for info in lane_info: lane = info["lane"] record = info["dxrecord"] # workflow verified in check_input, assume no change workflow = dxpy.get_handler(args.workflow) # Prepare output folder, if downstream analysis specified analyses_target_folder = get_target_folder(REMOTE_ANALYSIS_FOLDER, lane) print_stderr("Creating output folder %s" % (analyses_target_folder)) try: project.new_folder(analyses_target_folder, parents=True) except dxpy.DXError as e: raise_error("Failed to create new folder %s. %s" % (analyses_target_folder, e)) # Decide on job name (<executable>-<run_id>) job_name = workflow.title + "-" + run_id # Overwite upload_sentinel_record input of applet to the record of inc upload downstream_input["0.upload_sentinel_record"] = dxpy.dxlink(record) # Run specified applet job = workflow.run(downstream_input, folder=analyses_target_folder, project=args.project, name=job_name) print_stderr("Initiated analyses %s from workflow %s for lane %s" % (job, args.workflow, lane)) # Close if args.workflow if args.script: # script has been validated to be executable earlier, assume no change try: sub.check_call([args.script, args.run_dir]) except sub.CalledProcessError as e: raise_error("Executable (%s) failed with error %d: %s" % (args.script, e.returncode, e.output))
def test_var_initialization(self): ''' This test assumes a well-formed input spec and mostly just tests that everything compiles and the variable initialization code does not throw any errors. ''' print("Setting current project to", self.project) dxpy.WORKSPACE_ID = self.project dxpy.PROJECT_CONTEXT_ID = self.project # Make some data objects for input dxapplet = dxpy.api.applet_new({"project": dxpy.WORKSPACE_ID, "name": "anapplet", "dxapi": "1.0.0", "runSpec": {"code": "", "interpreter": "bash"}})['id'] dxfile = dxpy.upload_string("foo", name="afile") dxgtable = dxpy.new_dxgtable(columns=[{"name": "int_col", "type": "int"}], name="agtable") dxgtable.add_rows([[3], [0]]) dxgtable.close(block=True) dxrecord = dxpy.new_dxrecord(name="arecord") dxrecord.close() dxapp_json = { "name": "all_vars", "title": "all_vars", "summary": "all_vars", "dxapi": "1.0.0", "version": "0.0.1", "categories": [], "inputSpec": [], "outputSpec": [] } classes = ['applet', 'record', 'file', 'gtable', 'boolean', 'int', 'float', 'string', 'hash', 'array:applet', 'array:record', 'array:file', 'array:gtable', 'array:boolean', 'array:int', 'array:float', 'array:string'] for classname in classes: dxapp_json['inputSpec'].append({"name": "required_" + classname.replace(":", "_"), "class": classname, "optional": False}) # Note: marking outputs as optional so that empty arrays # will be acceptable; keeping names the same (as required) # in order to allow pass-through from input variables dxapp_json['outputSpec'].append({"name": "required_" + classname.replace(":", "_"), "class": classname, "optional": True}) dxapp_json['inputSpec'].append({"name": "optional_" + classname.replace(":", "_"), "class": classname, "optional": True}) cmdline_args = ['-irequired_applet=anapplet', '-irequired_array_applet=anapplet', '-irequired_record=arecord', '-irequired_array_record=arecord', '-irequired_file=afile', '-irequired_array_file=afile', '-irequired_gtable=agtable', '-irequired_array_gtable=agtable', '-irequired_boolean=true', '-irequired_array_boolean=true', '-irequired_array_boolean=false', '-irequired_int=32', '-irequired_array_int=42', '-irequired_float=3.4', '-irequired_array_float=.42', '-irequired_string=foo', '-irequired_array_string=bar', '-irequired_hash={"foo":"bar"}'] for lang in supported_languages: appdir = create_app_dir_with_dxapp_json(dxapp_json, lang) # Test with bare-minimum of inputs output = subprocess.check_output(['dx-run-app-locally', appdir] + cmdline_args) print(output) # Verify array is printed total 3 times once in each input, logs, and final output self.assertEquals(len(re.findall("required_array_boolean = \[ true, false ]", output)), 3) self.assertIn("App finished successfully", output) # See PTFM-13697 for CentOS 5 details if testutil.TEST_RUN_JOBS and not testutil.host_is_centos_5(): # Now actually make it an applet and run it applet_name = dxapp_json['name'] + '-' + lang subprocess.check_output(['dx', 'build', appdir, '--destination', applet_name]) subprocess.check_output(['dx', 'run', applet_name, '-y', '--wait'] + cmdline_args)
def test_get_appet_with_asset(self): bundle_name = "test-bundle-depends.tar.gz" bundle_tmp_dir = tempfile.mkdtemp() os.mkdir(os.path.join(bundle_tmp_dir, "a")) with open(os.path.join(bundle_tmp_dir, 'a', 'foo.txt'), 'w') as file_in_bundle: file_in_bundle.write('foo\n') subprocess.check_call([ 'tar', '-czf', os.path.join(bundle_tmp_dir, bundle_name), '-C', os.path.join(bundle_tmp_dir, 'a'), '.' ]) bundle_file = dxpy.upload_local_file(filename=os.path.join( bundle_tmp_dir, bundle_name), project=self.project, wait_on_close=True) asset_file = dxpy.upload_local_file(filename=os.path.join( bundle_tmp_dir, bundle_name), project=self.project, wait_on_close=True) dxrecord_details = { "archiveFileId": { "$dnanexus_link": asset_file.get_id() } } dxrecord = dxpy.new_dxrecord(project=self.project, types=["AssetBundle"], details=dxrecord_details, name='asset-lib-test', properties={"version": "0.0.1"}) dxrecord.close() asset_bundle_id = dxrecord.get_id() asset_file.set_properties({"AssetBundle": asset_bundle_id}) code_str = """#!/bin/bash main(){ echo 'Hello World' } """ app_spec = { "name": "asset_depends", "dxapi": "1.0.0", "runSpec": { "code": code_str, "interpreter": "bash", "distribution": "Ubuntu", "release": "14.04", "assetDepends": [{ "id": asset_bundle_id }], "bundledDepends": [{ "name": bundle_name, "id": { "$dnanexus_link": bundle_file.get_id() } }] }, "inputSpec": [], "outputSpec": [], "version": "1.0.0" } app_dir = self.write_app_directory("asset_depends", json.dumps(app_spec)) asset_applet_id = json.loads( run("dx build --json {app_dir}".format(app_dir=app_dir)))["id"] with chdir(tempfile.mkdtemp()): run("dx get --omit-resources " + asset_applet_id) self.assertTrue(os.path.exists("asset_depends")) self.assertFalse( os.path.exists(os.path.join("asset_depends", "resources"))) self.assertTrue( os.path.exists(os.path.join("asset_depends", "dxapp.json"))) with open(os.path.join("asset_depends", "dxapp.json")) as fh: applet_spec = json.load(fh) self.assertEqual([{ "name": "asset-lib-test", "project": self.project, "folder": "/", "version": "0.0.1" }], applet_spec["runSpec"]["assetDepends"]) self.assertEqual([{ "name": bundle_name, "id": { "$dnanexus_link": bundle_file.get_id() } }], applet_spec["runSpec"]["bundledDepends"])
def generate_report(geneBody, inner_dist, junc_ann, read_dist, read_dup, mappings, contam, names): report_details = {} # Gene Body Dist loc_in_gene = [n for n in range(100)] report_details['Gene Body Coverage'] = { "Normalized Location in Gene": loc_in_gene, "% of Reads Covering": geneBody } ######################### # Inner Distance if inner_dist != None: dxpy.download_dxfile(inner_dist, "inner_dist.txt") inner_bucket = [] inner_num_reads = [] inner_total_reads = 0 # if a bucket has less than 0.1% of reads in it then don't include it cutoff = 0.001 with open("inner_dist.txt", "r") as fh: line = fh.readline().rstrip("\n") while line != "": inner_total_reads += int(line.split()[2]) line = fh.readline().rstrip("\n") bucket_cutoff = cutoff * inner_total_reads print "Applying cutoff of: " + str( cutoff) + " for inner distance calculation" with open("inner_dist.txt", "r") as fh: line = fh.readline().rstrip("\n") while line != "": start, end, num_reads = [int(x) for x in line.split()] if num_reads > bucket_cutoff: # store center position of this bucket inner_bucket.append(int(end - ((end - start) / 2))) inner_num_reads.append(num_reads) line = fh.readline().rstrip("\n") # find total to normalize inner_total_reads = sum(inner_num_reads) print "Total reads for inner distance calculation: " + str( inner_total_reads) inner_median = None running_total = 0 inner_length_sum = 0 for i in range(len(inner_bucket)): # multiply read length by number of observations for the mean inner_length_sum += inner_bucket[i] * inner_num_reads[i] # calculate median running_total += inner_num_reads[i] if running_total >= inner_total_reads / 2 and inner_median == None: inner_median = inner_bucket[i] inner_mean = inner_length_sum / inner_total_reads print "inner distance metrics: " + " ".join( [str(inner_length_sum), str(inner_total_reads)]) # calc standard deviation std_sum = 0 for i in range(len(inner_bucket)): std_sum += ((inner_bucket[i] - inner_mean)**2) * inner_num_reads[i] std_sum /= inner_total_reads inner_std = int(math.sqrt(std_sum) + 0.5) report_details['Paired Read Inner Distance'] = { "Inner Distance (bp)": inner_bucket, "Count": inner_num_reads, "Mean": inner_mean, "Median": inner_median, "Standard Deviation": inner_std } ############################ # Junction Annotation dxpy.download_dxfile(junc_ann, "junc_ann.r") # initialize splicing values in case there was no splicing sj_k = 0 sj_pn = 0 sj_cn = 0 se_k = 0 se_pn = 0 se_cn = 0 if os.path.getsize("junc_ann.r") == 0: print "No splicing events found so setting all junction stats to 0" else: with open("junc_ann.r", "r") as fh: line = fh.readline() while line != "": line = line.rstrip("\n") if line.startswith("events"): # parse out the % and assign them se_pn, se_cn, se_k = [ float(n) / 100 for n in line[9:-1].split(",") ] if line.startswith("junction"): sj_pn, sj_cn, sj_k = [ float(n) / 100 for n in line[11:-1].split(",") ] line = fh.readline() report_details['Junction Annotation'] = { "Splicing Junctions": { "known": sj_k, "partial novel": sj_pn, "complete novel": sj_cn }, "Splicing Events": { "known": se_k, "partial novel": se_pn, "complete novel": se_cn } } ############################ # read duplication dxpy.download_dxfile(read_dup, "read_dup.txt") pos_copy = [] pos_num_reads = [] pos_total_reads = 0 seq_copy = [] seq_num_reads = [] seq_total_reads = 0 with open("read_dup.txt", "r") as fh: # pull of first header line = fh.readline() line = fh.readline() # read until we hit the stats for sequence based duplication while not line.startswith("Occurrence"): c, r = [int(n) for n in line.split()] pos_copy.append(c) pos_num_reads.append(float(r)) pos_total_reads += r line = fh.readline() #get next line to start with the data line = fh.readline() while line != "": c, r = [int(n) for n in line.split()] seq_copy.append(c) seq_num_reads.append(float(r)) seq_total_reads += r line = fh.readline() pos_total_reads = float(pos_total_reads) seq_total_reads = float(seq_total_reads) for i in range(len(pos_num_reads)): pos_num_reads[i] /= pos_total_reads for i in range(len(seq_num_reads)): seq_num_reads[i] /= seq_total_reads report_details['Read Duplication'] = { "Position Based": { "Read Occurrences": pos_copy, "% Reads": pos_num_reads }, "Sequence Based": { "Read Occurrences": seq_copy, "% Reads": seq_num_reads } } ############################ # read distribution report if read_dist != None: dxpy.download_dxfile(read_dist, "read_dist.txt") report_details['Read Distribution'] = {} with open("read_dist.txt", "r") as rd_file: report_details['Read Distribution']['Total Reads'] = int( rd_file.readline().split()[-1]) report_details['Read Distribution']['Total Tags'] = int( rd_file.readline().split()[-1]) report_details['Read Distribution']['Total Assigned Tags'] = int( rd_file.readline().split()[-1]) # pull out line of "="s rd_file.readline() # pull header line rd_file.readline() line = rd_file.readline() while not line.startswith("="): fields = line.split() report_details['Read Distribution'][fields[0]] = [ int(fields[1]), int(fields[2]), float(fields[3]) ] line = rd_file.readline() ############################# # add report of contaminations if calculated if contam != None: contam_report = [] for i in range(len(contam)): contam_report.append({ "Contaminant Name": names[i], "% Reads Mapping": contam[i] }) report_details['Contamination'] = contam_report ############################# # add link to mappings report_details['original_mappings'] = mappings report_name = dxpy.DXGTable(mappings).describe()['name'] + " RSeQC report" # create report report = dxpy.new_dxrecord(name=report_name, details=report_details, types=["Report", "RSeQC"]) report.close() return {"Report": dxpy.dxlink(report.get_id())}
def test_var_initialization(self): ''' This test assumes a well-formed input spec and mostly just tests that everything compiles and the variable initialization code does not throw any errors. ''' print("Setting current project to", self.project) dxpy.WORKSPACE_ID = self.project dxpy.PROJECT_CONTEXT_ID = self.project # Make some data objects for input dxpy.api.applet_new({ "project": dxpy.WORKSPACE_ID, "name": "anapplet", "dxapi": "1.0.0", "runSpec": { "code": "", "interpreter": "bash", "distribution": "Ubuntu", "release": "14.04" } })['id'] dxpy.upload_string("foo", name="afile") dxrecord = dxpy.new_dxrecord(name="arecord") dxrecord.close() dxapp_json = { "name": "all_vars", "title": "all_vars", "summary": "all_vars", "dxapi": "1.0.0", "version": "0.0.1", "categories": [], "inputSpec": [], "outputSpec": [] } classes = [ 'applet', 'record', 'file', 'boolean', 'int', 'float', 'string', 'hash', 'array:applet', 'array:record', 'array:file', 'array:boolean', 'array:int', 'array:float', 'array:string' ] for classname in classes: dxapp_json['inputSpec'].append({ "name": "required_" + classname.replace(":", "_"), "class": classname, "optional": False }) # Note: marking outputs as optional so that empty arrays # will be acceptable; keeping names the same (as required) # in order to allow pass-through from input variables dxapp_json['outputSpec'].append({ "name": "required_" + classname.replace(":", "_"), "class": classname, "optional": True }) dxapp_json['inputSpec'].append({ "name": "optional_" + classname.replace(":", "_"), "class": classname, "optional": True }) cmdline_args = [ '-irequired_applet=anapplet', '-irequired_array_applet=anapplet', '-irequired_record=arecord', '-irequired_array_record=arecord', '-irequired_file=afile', '-irequired_array_file=afile', '-irequired_boolean=true', '-irequired_array_boolean=true', '-irequired_array_boolean=false', '-irequired_int=32', '-irequired_array_int=42', '-irequired_float=3.4', '-irequired_array_float=.42', '-irequired_string=foo', '-irequired_array_string=bar', '-irequired_hash={"foo":"bar"}' ] for lang in supported_languages: appdir = create_app_dir_with_dxapp_json(dxapp_json, lang) # See PTFM-13697 for CentOS 5 details if testutil.TEST_RUN_JOBS and not testutil.host_is_centos_5(): # Now actually make it an applet and run it applet_name = dxapp_json['name'] + '-' + lang subprocess.check_output( ['dx', 'build', appdir, '--destination', applet_name]) subprocess.check_output( ['dx', 'run', applet_name, '-y', '--wait'] + cmdline_args)
def _clone_asset(record, folder, regions, project_dict): """ This function will attempt to clone the given record into all of the given regions. It will return a dictionary with the regions as keys and the record-ids of the corresponding asset as the values. If an asset is not able to be created in a given region, the value will be set to None. """ # Get the asset record fid = record.get_details()['archiveFileId']['$dnanexus_link'] curr_region = dxpy.describe(record.project)['region'] # Only run once per region regions = set(regions) - set([curr_region]) if len(regions) == 0: # there is nothing to do return app_supported_regions = set( COPY_FILE_APP.describe()['regionalOptions'].keys()) if len(regions - app_supported_regions) > 0: print('Currently no support for the following region(s): [{regions}]'. format(regions=', '.join(regions - app_supported_regions)), file=sys.stderr) sys.exit(1) # Get information about the asset asset_properties = record.get_properties() asset_properties['cloned_from'] = record.get_id() asset_file_name = dxpy.describe(fid)['name'] url = dxpy.DXFile(fid).get_download_url( preauthenticated=True, project=dxpy.DXFile.NO_PROJECT_HINT, duration=URL_DURATION)[0] # setup target folders region2projid = {} for region in regions: dest_proj = util.get_project(project_dict[region]) dest_proj.new_folder(folder, parents=True) region2projid[region] = dest_proj.get_id() print(region2projid) # Fire off a clone process for each region # Wait for the cloning to complete for i in [1, 2, 3]: jobs = _clone_to_all_regions(region2projid, regions, asset_file_name, folder, url) retval = _wait_for_completion(jobs) if retval: break # make records for each file for region in regions: dest_proj_id = region2projid[region] results = list( dxpy.find_data_objects(classname="file", visibility="hidden", name=asset_file_name, project=dest_proj_id, folder=folder)) file_ids = [p["id"] for p in results] if len(file_ids) == 0: raise RuntimeError("Found no files {}:{}/{}".format( dest_proj_id, folder, asset_file_name)) if len(file_ids) > 1: raise RuntimeError( "Found {} files {}:{}/{}, instead of just one".format( len(dxfiles), dest_proj_id, folder, asset_file_name)) dest_asset = dxpy.new_dxrecord( name=record.name, types=['AssetBundle'], details={'archiveFileId': dxpy.dxlink(file_ids[0])}, properties=record.get_properties(), project=dest_proj_id, folder=folder, close=True)
def test_dx_jobutil_new_job(self): first_record = dxpy.new_dxrecord(name="first_record") second_record = dxpy.new_dxrecord(name="second_record") dxpy.new_dxrecord(name="duplicate_name_record") dxpy.new_dxrecord(name="duplicate_name_record") # In a different project... third_record = dxpy.new_dxrecord(name="third_record", project=self.aux_project.get_id()) test_cases = ( # string ("-ifoo=input_string", {"foo": "input_string"}), # string that looks like a {job,analysis} ID ("-ifoo=job-012301230123012301230123", {"foo": "job-012301230123012301230123"}), ("-ifoo=analysis-012301230123012301230123", {"foo": "analysis-012301230123012301230123"}), # int ("-ifoo=24", {"foo": 24}), # float ("-ifoo=24.5", {"foo": 24.5}), # json ('-ifoo=\'{"a": "b"}\'', {"foo": {"a": "b"}}), ('-ifoo=\'["a", "b"]\'', {"foo": ["a", "b"]}), # objectName ("-ifoo=first_record", {"foo": dxpy.dxlink(first_record.get_id(), self.project)}), # objectId ("-ifoo=" + first_record.get_id(), {"foo": dxpy.dxlink(first_record.get_id())}), # project:objectName ("-ifoo=" + self.aux_project.get_id() + ":third_record", {"foo": dxpy.dxlink(third_record.get_id(), self.aux_project.get_id())}), # project:objectId ("-ifoo=" + self.aux_project.get_id() + ":" + third_record.get_id(), {"foo": dxpy.dxlink(third_record.get_id(), self.aux_project.get_id())}), # same, but wrong project is specified ("-ifoo=" + self.project + ":" + third_record.get_id(), {"foo": dxpy.dxlink(third_record.get_id(), self.aux_project.get_id())}), # glob ("-ifoo=first*", {"foo": dxpy.dxlink(first_record.get_id(), self.project)}), # JBOR ("-ifoo=job-012301230123012301230123:outputfield", {"foo": {"$dnanexus_link": {"job": "job-012301230123012301230123", "field": "outputfield"}}}), # order of inputs is preserved from command line to API call ("-ifoo=first* -ifoo=second_record -ifoo=job-012301230123012301230123:outputfield", {"foo": [dxpy.dxlink(first_record.get_id(), self.project), dxpy.dxlink(second_record.get_id(), self.project), {"$dnanexus_link": {"job": "job-012301230123012301230123", "field": "outputfield"}}]}), ("-ifoo=job-012301230123012301230123:outputfield -ifoo=first_record -ifoo=second_*", {"foo": [{"$dnanexus_link": {"job": "job-012301230123012301230123", "field": "outputfield"}}, dxpy.dxlink(first_record.get_id(), self.project), dxpy.dxlink(second_record.get_id(), self.project)]}), # if there is any ambiguity, the name is left unresolved ("-ifoo=duplicate_name_record", {"foo": "duplicate_name_record"}), ("-ifoo=*record", {"foo": "*record"}), # Override class ("-ifoo:int=24", {"foo": 24}), ("-ifoo:string=24", {"foo": "24"}), ("-ifoo:string=first_record", {"foo": "first_record"}), ('-ifoo:hash=\'{"a": "b"}\'', {"foo": {"a": "b"}}), ('-ifoo:hash=\'["a", "b"]\'', {"foo": ["a", "b"]}), ("-ifoo:file=first_record", None), # Error ("-ifoo:int=foo", None), # Error ("-ifoo:int=24.5", None), # Error # Array inputs # implicit array notation ("-ifoo=24 -ifoo=25", {"foo": [24, 25]}), ("-ifoo=25 -ibar=1 -ifoo=24", {"foo": [25, 24], "bar": 1}), ("-ifoo=first_record -ifoo=second_record", {"foo": [dxpy.dxlink(first_record.get_id(), self.project), dxpy.dxlink(second_record.get_id(), self.project)]}), # different types (unusual, but potentially meaningful if # foo is a json input) ("-ifoo=24 -ifoo=bar", {"foo": [24, "bar"]}), # explicit array notation is NOT respected (in contexts with # no inputSpec such as this one) ("-ifoo:array:int=24", {"foo": 24}), ("-ifoo:array:record=first_record", {"foo": dxpy.dxlink(first_record.get_id(), self.project)}), ) env = override_environment(DX_JOB_ID="job-000000000000000000000001", DX_WORKSPACE_ID=self.project) for cmd_snippet, expected_input_hash in test_cases: cmd = "dx-jobutil-new-job " + cmd_snippet + " entrypointname --test" if expected_input_hash is None: with self.assertSubprocessFailure(exit_code=1): run(cmd, env=env) else: output = run(cmd, env=env) self.assertEqual(json.loads(output), {"input": expected_input_hash, "function": "entrypointname"})
def test_input(self): first_record = dxpy.new_dxrecord(name="first_record") second_record = dxpy.new_dxrecord(name="second_record") dxpy.new_dxrecord(name="duplicate_name_record") dxpy.new_dxrecord(name="duplicate_name_record") # In a different project... third_record = dxpy.new_dxrecord(name="third_record", project=self.aux_project.get_id()) test_cases = ( # string ("-ifoo=input_string", { "foo": "input_string" }), # string that looks like a {job,analysis} ID ("-ifoo=job-012301230123012301230123", { "foo": "job-012301230123012301230123" }), ("-ifoo=analysis-012301230123012301230123", { "foo": "analysis-012301230123012301230123" }), # int ("-ifoo=24", { "foo": 24 }), # float ("-ifoo=24.5", { "foo": 24.5 }), # json ('-ifoo=\'{"a": "b"}\'', { "foo": { "a": "b" } }), ('-ifoo=\'["a", "b"]\'', { "foo": ["a", "b"] }), # objectName ("-ifoo=first_record", { "foo": dxpy.dxlink(first_record.get_id(), self.project) }), # objectId ("-ifoo=" + first_record.get_id(), { "foo": dxpy.dxlink(first_record.get_id()) }), # project:objectName ("-ifoo=" + self.aux_project.get_id() + ":third_record", { "foo": dxpy.dxlink(third_record.get_id(), self.aux_project.get_id()) }), # project:objectId ("-ifoo=" + self.aux_project.get_id() + ":" + third_record.get_id(), { "foo": dxpy.dxlink(third_record.get_id(), self.aux_project.get_id()) }), # same, but wrong project is specified ("-ifoo=" + self.project + ":" + third_record.get_id(), { "foo": dxpy.dxlink(third_record.get_id(), self.aux_project.get_id()) }), # glob ("-ifoo=first*", { "foo": dxpy.dxlink(first_record.get_id(), self.project) }), # JBOR ("-ifoo=job-012301230123012301230123:outputfield", { "foo": { "$dnanexus_link": { "job": "job-012301230123012301230123", "field": "outputfield" } } }), # order of inputs is preserved from command line to API call ("-ifoo=first* -ifoo=second_record -ifoo=job-012301230123012301230123:outputfield", { "foo": [ dxpy.dxlink(first_record.get_id(), self.project), dxpy.dxlink(second_record.get_id(), self.project), { "$dnanexus_link": { "job": "job-012301230123012301230123", "field": "outputfield" } } ] }), ("-ifoo=job-012301230123012301230123:outputfield -ifoo=first_record -ifoo=second_*", { "foo": [{ "$dnanexus_link": { "job": "job-012301230123012301230123", "field": "outputfield" } }, dxpy.dxlink(first_record.get_id(), self.project), dxpy.dxlink(second_record.get_id(), self.project)] }), # if there is any ambiguity, the name is left unresolved ("-ifoo=duplicate_name_record", { "foo": "duplicate_name_record" }), ("-ifoo=*record", { "foo": "*record" }), # Override class ("-ifoo:int=24", { "foo": 24 }), ("-ifoo:string=24", { "foo": "24" }), ("-ifoo:string=first_record", { "foo": "first_record" }), ('-ifoo:hash=\'{"a": "b"}\'', { "foo": { "a": "b" } }), ('-ifoo:hash=\'["a", "b"]\'', { "foo": ["a", "b"] }), # Array inputs # implicit array notation ("-ifoo=24 -ifoo=25", { "foo": [24, 25] }), ("-ifoo=25 -ibar=1 -ifoo=24", { "foo": [25, 24], "bar": 1 }), ("-ifoo=first_record -ifoo=second_record", { "foo": [ dxpy.dxlink(first_record.get_id(), self.project), dxpy.dxlink(second_record.get_id(), self.project) ] }), # different types (unusual, but potentially meaningful if # foo is a json input) ("-ifoo=24 -ifoo=bar", { "foo": [24, "bar"] }), # explicit array notation is NOT respected (in contexts with # no inputSpec such as this one) ("-ifoo:array:int=24", { "foo": 24 }), ("-ifoo:array:record=first_record", { "foo": dxpy.dxlink(first_record.get_id(), self.project) }), ) for cmd_snippet, expected_input_hash in test_cases: arguments_hash = {"input": expected_input_hash} self.assertNewJobInputHash(cmd_snippet, arguments_hash)
def test_pipeline_completion(self): dxpipeline = dxpy.new_dxrecord(name="my workflow", types=["pipeline"]) self.assert_completion("dx run my", "my workflow ")