def check_file_content(out_param_name, out_filename, tmp_fname, str_content): """ Download a file, read it from local disk, and verify that it has the correct contents """ if not out_param_name in job_output: raise "Error: key {} does not appear in the job output".format( out_param_name) dxlink = job_output[out_param_name] # check that the filename gets preserved trg_fname = dxpy.get_handler(dxlink).name self.assertEqual(trg_fname, out_filename) # download the file and check the contents silent_file_remove(tmp_fname) dxpy.download_dxfile(dxlink, tmp_fname) with open(tmp_fname, "r") as fh: data = fh.read() print(data) if not (strip_white_space(data) == strip_white_space(str_content)): raise Exception( "contents of file {} do not match".format( out_param_name)) silent_file_remove(tmp_fname)
def copy_files(fids, projectId, folder, overwrite=False): '''Copies array of dx file dicts to project:/folder, returning new array of dx file dicts.''' newFids = [] for fid in fids: fileDict = dxpy.describe(FILES[fid]) # FILES contain dxLinks if fileDict['project'] == projectId: # cannot copy into the same project!!! # so just leave in place and pretend that we did! #proj = dxpy.DXProject(projectId) #proj.move(folder,[fid]) newFids.append(fid) continue # Check to see if file already exists. alreadyThere = find_file(folder + '/' + fileDict['name'], projectId) if alreadyThere is None or overwrite: # remove what is alreadyThere? #if alreadyThere is not None: # proj = dxpy.DXProject(projectId) # proj.remove_objects([alreadyThere]) dxFile = dxpy.get_handler(FILES[fid]) newLink = dxpy.dxlink(dxFile.clone(projectId, folder)) else: newLink = FILES(alreadyThere) if newLink == None: print "ERROR: Failed in copy of '" + fileDict['project'] + ":" + fileDict['name'] + \ "' to '" + projectId + ":" + folder + "'." sys.exit(1) newDict = dxpy.describe(newLink) FILES[newDict['id']] = newLink newFids.append(newDict['id']) return newFids
def format_result(result): if return_handler: result = dxpy.get_handler(result['id'], project=result.get('project')) if by_parent is not None: return result, by_parent, descriptions else: return result
def list_subfolders(project, path, recurse=True): ''' :param project: Project ID to use as context for the listing :type project: string :param path: Subtree root path :type path: string :param recurse: Return a complete subfolders tree :type recurse: boolean Returns a list of subfolders for the remote *path* (included to the result) of the *project*. Example:: list_subfolders("project-xxxx", folder="/input") ''' project_folders = dxpy.get_handler(project).describe( input_params={'folders': True})['folders'] # TODO: support shell-style path globbing (i.e. /a*/c matches /ab/c but not /a/b/c) # return pathmatch.filter(project_folders, os.path.join(path, '*')) if recurse: return (f for f in project_folders if f.startswith(path)) else: return (f for f in project_folders if f.startswith(path) and '/' not in f[len(path) + 1:])
def file_handler_from_fid(fid): '''Returns dx file handler from fid.''' try: dxlink = FILES[fid] except: dxlink = dxpy.dxlink(fid) return dxpy.get_handler(dxlink)
def copy_files(fids, projectId, folder, overwrite=False): '''Copies array of dx file dicts to project:/folder, returning new array of dx file dicts.''' newFids = [] for fid in fids: fileDict = dxpy.describe(FILES[fid]) # FILES contain dxLinks if fileDict['project'] == projectId: # cannot copy into the same project!!! # so just leave in place and pretend that we did! #proj = dxpy.DXProject(projectId) #proj.move(folder,[fid]) newFids.append( fid ) continue # Check to see if file already exists. alreadyThere = find_file(folder+'/'+fileDict['name'],projectId) if alreadyThere is None or overwrite: # remove what is alreadyThere? #if alreadyThere is not None: # proj = dxpy.DXProject(projectId) # proj.remove_objects([alreadyThere]) dxFile = dxpy.get_handler(FILES[fid]) newLink = dxpy.dxlink(dxFile.clone(projectId, folder)) else: newLink = FILES(alreadyThere) if newLink == None: print "ERROR: Failed in copy of '" + fileDict['project'] + ":" + fileDict['name'] + \ "' to '" + projectId + ":" + folder + "'." sys.exit(1) newDict = dxpy.describe(newLink) FILES[newDict['id']] = newLink newFids.append( newDict['id'] ) return newFids
def job_describe(job_id, key=None, verbose=False): '''Returns dx job's description property matching 'key'.''' dxjob = None try: dxjob = dxpy.get_handler(job_id) except: sys.stderr.write('ERROR: unable to find job: "' + job_id + '": \n') sys.exit(0) # Do not error on tool run in dx script desciption = dxjob.describe() if not desciption: sys.stderr.write('ERROR: unable to find description of job "' + job_id + '": \n') sys.exit(0) # Do not error on tool run in dx script if key == None: if verbose: sys.stderr.write(json.dumps(desciption) + '\n') return desciption if key not in desciption: sys.stderr.write('ERROR: unable to find "' + key + '" in description of job "' + job_id + '": \n') sys.exit(0) # Do not error on tool run in dx script value = desciption[key] if verbose: sys.stderr.write(value + '\n') return value
def job_describe(job_id,key=None,verbose=False): '''Returns dx job's description property matching 'key'.''' dxjob = None try: dxjob = dxpy.get_handler(job_id) except: sys.stderr.write('WARNING: unable to find job: "' + job_id + '": \n') sys.exit(0) # Do not error on tool run in dx script desciption = dxjob.describe() if not desciption: sys.stderr.write('WARNING: unable to find description of job "' + job_id + '": \n') sys.exit(0) # Do not error on tool run in dx script if key == None: if verbose: sys.stderr.write(json.dumps(desciption) + '\n') return desciption if key not in desciption: sys.stderr.write('WARNING: unable to find "'+key+'" in description of job "' + job_id + '": \n') sys.exit(0) # Do not error on tool run in dx script value = desciption[key] if verbose: sys.stderr.write(value + '\n') return value
def unpack_tar(self, tar_file_dxlink): ''' DEV: Eventually integrate dx-toolkit into trajectoread repo so I can transition to using 'dx-download-all-inputs' to handle unpacking all input files. Pipeline used to store lane file dxids as project properties and then pass to "dx download" Description: Download and untar metadata and lane data files (/Data/Intensities/BaseCalls) ''' if dxpy.is_dxlink(tar_file_dxlink): file_handler = dxpy.get_handler(tar_file_dxlink) filename = file_handler.name else: print 'Error: Cannot unpack %s; not a valid DXLink object' sys.exit() # ('file-dxid', 'project-dxid') = dxpy.get_dxlink_ids(dxlink) file_dxid = dxpy.get_dxlink_ids(tar_file_dxlink)[0] project_id = dxpy.get_dxlink_ids(tar_file_dxlink)[1] # Download file from DNAnexus objectstore to virtual machine dxpy.download_dxfile(dxid=file_dxid, filename=filename, project=project_id) # Untar file ## DEV: Check if this is even in use anymore; also should have some method for ## checking what type of compression was used. ## But I don't think this is in use command = 'tar -xf %s --owner root --group root --no-same-owner' % filename self.createSubprocess(cmd=command, pipeStdout=False)
def test_dxfs_operations(self): # FIXME: Make the mount live or add command to refresh it with remote changes #subprocess.check_call(['dx', 'mkdir', 'foo']) #subprocess.check_call(['dx', 'mkdir', 'bar']) #subprocess.check_call(['dx', 'mkdir', '-p', '/bar/baz']) self.assertEqual(set(os.listdir(self.mountpoint)), set(['foo', 'bar', os.path.basename(__file__)])) # Reading self.assertEqual( open(__file__).read(), open(os.path.join(self.mountpoint, __file__)).read()) # Moving shutil.move(os.path.join(self.mountpoint, __file__), os.path.join(self.mountpoint, __file__ + "2")) self.assertEqual(set(os.listdir(self.mountpoint)), set(['foo', 'bar', os.path.basename(__file__ + "2")])) shutil.move(os.path.join(self.mountpoint, __file__ + "2"), os.path.join(self.mountpoint, "foo")) self.assertEqual(set(os.listdir(os.path.join(self.mountpoint, 'foo'))), set([os.path.basename(__file__ + "2")])) folder_listing = self.project.list_folder('/foo') self.assertEqual(len(folder_listing['folders']), 0) self.assertEqual(len(folder_listing['objects']), 1) self.assertEqual( dxpy.get_handler(folder_listing['objects'][0]['id']).name, os.path.basename(__file__ + "2")) self.assertEqual( open(__file__).read(), open(os.path.join(self.mountpoint, 'foo', __file__ + "2")).read())
def main(): app = my_app.create_app() dxhandler = dxpy.get_handler(dxpy.JOB_ID) dxhandler.set_properties({"httpsAppState": "running"}) app.run_server(host='0.0.0.0', port=443) return 1
def verify_files_in_dir(path, expected_filenames, dxproj): """ verify that a particular set of files resides in a directory """ dir_listing = dxproj.list_folder(folder=path, only="objects") for elem in dir_listing["objects"]: handler = dxpy.get_handler(elem["id"]) if not isinstance(handler, dxpy.DXFile): continue if handler.name not in expected_filenames: raise Exception("Error: file {} should reside in directory {}".format(handler.name, path))
def test_deepdirs(self): """ Tests the use of subdirectories in the output directory """ def check_output_key(job_output, out_param_name, num_files, dxproj): """ check that an output key appears, and has the correct number of files """ print("checking output for param={}".format(out_param_name)) if out_param_name not in job_output: raise "Error: key {} does not appear in the job output".format(out_param_name) dxlink_id_list = job_output[out_param_name] if not len(dxlink_id_list) == num_files: raise Exception( "Error: key {} should have {} files, but has {}".format( out_param_name, num_files, len(dxlink_id_list) ) ) def verify_files_in_dir(path, expected_filenames, dxproj): """ verify that a particular set of files resides in a directory """ dir_listing = dxproj.list_folder(folder=path, only="objects") for elem in dir_listing["objects"]: handler = dxpy.get_handler(elem["id"]) if not isinstance(handler, dxpy.DXFile): continue if handler.name not in expected_filenames: raise Exception("Error: file {} should reside in directory {}".format(handler.name, path)) with temporary_project("TestDXBashHelpers.test_app1 temporary project") as dxproj: env = update_environ(DX_PROJECT_CONTEXT_ID=dxproj.get_id()) # Build the applet, patching in the bash helpers from the # local checkout applet_id = build_app_with_bash_helpers(os.path.join(TEST_APPS, "deepdirs"), dxproj.get_id()) # Run the applet cmd_args = ["dx", "run", "--yes", "--brief", applet_id] job_id = run(cmd_args, env=env).strip() dxpy.DXJob(job_id).wait_on_done() print("Test completed successfully, checking outputs\n") # Assertions about the output. There should be three result keys job_handler = dxpy.get_handler(job_id) job_output = job_handler.output check_output_key(job_output, "genes", 8, dxproj) check_output_key(job_output, "phenotypes", 7, dxproj) check_output_key(job_output, "report", 1, dxproj) check_output_key(job_output, "helix", 1, dxproj) verify_files_in_dir("/clue", ["X_1.txt", "X_2.txt", "X_3.txt"], dxproj) verify_files_in_dir("/hint", ["V_1.txt", "V_2.txt", "V_3.txt"], dxproj) verify_files_in_dir("/clue2", ["Y_1.txt", "Y_2.txt", "Y_3.txt"], dxproj) verify_files_in_dir("/hint2", ["Z_1.txt", "Z_2.txt", "Z_3.txt"], dxproj) verify_files_in_dir("/foo/bar", ["luke.txt"], dxproj) verify_files_in_dir("/", ["A.txt", "B.txt", "C.txt", "num_chrom.txt"], dxproj)
def verify_files_in_dir(path, expected_filenames, dxproj): ''' verify that a particular set of files resides in a directory ''' dir_listing = dxproj.list_folder(folder=path, only="objects") for elem in dir_listing["objects"]: handler = dxpy.get_handler(elem["id"]) if not isinstance(handler, dxpy.DXFile): continue if handler.name not in expected_filenames: raise Exception("Error: file {} should reside in directory {}". format(handler.name, path))
def upload_resources(src_dir, project=None, folder='/'): """ :returns: A list (possibly empty) of references to the generated archive(s) :rtype: list If it exists, archives and uploads the contents of the ``resources/`` subdirectory of *src_dir* to a new remote file object, and returns a list describing a single bundled dependency in the form expected by the ``bundledDepends`` field of a run specification. Returns an empty list, if no archive was created. """ applet_spec = _get_applet_spec(src_dir) if project is None: dest_project = applet_spec['project'] else: dest_project = project applet_spec['project'] = project resources_dir = os.path.join(src_dir, "resources") if os.path.exists(resources_dir) and len(os.listdir(resources_dir)) > 0: logger.debug("Uploading in " + src_dir) with tempfile.NamedTemporaryFile(suffix=".tar.gz") as tar_fh: subprocess.check_call( ['tar', '-C', resources_dir, '-czf', tar_fh.name, '.']) if 'folder' in applet_spec: try: dxpy.get_handler(dest_project).new_folder( applet_spec['folder'], parents=True) except dxpy.exceptions.DXAPIError: pass # TODO: make this better target_folder = applet_spec[ 'folder'] if 'folder' in applet_spec else folder dx_resource_archive = dxpy.upload_local_file(tar_fh.name, wait_on_close=True, project=dest_project, folder=target_folder, hidden=True) archive_link = dxpy.dxlink(dx_resource_archive.get_id()) return [{'name': 'resources.tar.gz', 'id': archive_link}] else: return []
def main(): job = dxpy.DXJob(dxid=sys.argv[1]) ccds_seqs = load_ccds_seqs(sys.argv[2]) alignments = dxpy.get_handler(job.describe()['output']['alignments']) alignments_desc = alignments.describe() # verify basic table properties assert 'CrossSpeciesAlignments' in alignments_desc['types'] assert alignments_desc['length'] == 318 # verify table schema cols = alignments.get_columns() assert len(cols) == len(expected_colnames) for i in xrange(len(expected_colnames)): assert cols[i]['type'] == 'string' assert cols[i]['name'] == expected_colnames[i] # verify table contents names = [] bps = 0 alns = [0 for i in xrange(3, len(expected_colnames))] ids = [0 for i in xrange(3, len(expected_colnames))] for row in alignments.iterate_rows(): ccds_id = row[1] names.append(ccds_id) assert len(row) == 1 + len(expected_colnames) # check length of each consensus sequence for col in xrange(4, len(row)): assert len(row[col]) == len(row[3]) # check that human sequence matches CCDS hs_seq = str(row[3]).translate(None, '-').upper() ccds_seq = ccds_seqs[ccds_id].upper() if len(ccds_seq) == 3 + len(hs_seq): # stop codon ccds_seq = ccds_seq[:len(ccds_seq) - 3] if hs_seq != ccds_seq: print 'Sequence mismatch for {}'.format(ccds_id) print 'Sequence found in alignments table: {}'.format(hs_seq) print 'Expected sequence from CCDS: {}'.format(ccds_seq) assert False # collect alignment statistics bps = bps + len(hs_seq) for i in xrange(4, 1 + len(expected_colnames)): a, s = similarity(row[3], row[i]) alns[i - 4] = alns[i - 4] + a ids[i - 4] = ids[i - 4] + s assert len(set(names)) == 318 print 'Alignment quality statistics (make sure these look reasonable):' print 'Informant\tAligned %\tIdentity %' for i in xrange(len(expected_colnames) - 3): print '{}\t{}\t{}'.format(expected_colnames[i + 3].rjust(12), alns[i] * 100 / bps, ids[i] * 100 / alns[i])
def test_get_handler(self): dxpy.set_workspace_id(self.second_proj_id) dxrecord = dxpy.new_dxrecord(project=self.proj_id) # Simple DXLink dxlink = {'$dnanexus_link': dxrecord.get_id()} handler = dxpy.get_handler(dxlink) self.assertEqual(handler.get_id(), dxrecord.get_id()) # Default project is not going to be the correct one self.assertNotEqual(handler.get_proj_id(), self.proj_id) # Extended DXLink dxlink = {'$dnanexus_link': {'id': dxrecord.get_id(), 'project': self.proj_id}} handler = dxpy.get_handler(dxlink) self.assertEqual(handler.get_id(), dxrecord.get_id()) self.assertEqual(handler.get_proj_id(), self.proj_id) # Handle project IDs dxproject = dxpy.get_handler(self.proj_id)
def main(): job=dxpy.DXJob(dxid=sys.argv[1]) ccds_seqs=load_ccds_seqs(sys.argv[2]) alignments = dxpy.get_handler(job.describe()['output']['alignments']) alignments_desc = alignments.describe() # verify basic table properties assert 'CrossSpeciesAlignments' in alignments_desc['types'] assert alignments_desc['length'] == 318 # verify table schema cols = alignments.get_columns() assert len(cols) == len(expected_colnames) for i in xrange(len(expected_colnames)): assert cols[i]['type'] == 'string' assert cols[i]['name'] == expected_colnames[i] # verify table contents names = [] bps = 0 alns = [0 for i in xrange(3, len(expected_colnames))] ids = [0 for i in xrange(3, len(expected_colnames))] for row in alignments.iterate_rows(): ccds_id = row[1] names.append(ccds_id) assert len(row) == 1+len(expected_colnames) # check length of each consensus sequence for col in xrange(4,len(row)): assert len(row[col]) == len(row[3]) # check that human sequence matches CCDS hs_seq = str(row[3]).translate(None, '-').upper() ccds_seq = ccds_seqs[ccds_id].upper() if len(ccds_seq) == 3+len(hs_seq): # stop codon ccds_seq = ccds_seq[:len(ccds_seq)-3] if hs_seq != ccds_seq: print 'Sequence mismatch for {}'.format(ccds_id) print 'Sequence found in alignments table: {}'.format(hs_seq) print 'Expected sequence from CCDS: {}'.format(ccds_seq) assert False # collect alignment statistics bps = bps + len(hs_seq) for i in xrange(4, 1+len(expected_colnames)): a, s = similarity(row[3],row[i]) alns[i-4] = alns[i-4] + a ids[i-4] = ids[i-4] + s assert len(set(names)) == 318 print 'Alignment quality statistics (make sure these look reasonable):' print 'Informant\tAligned %\tIdentity %' for i in xrange(len(expected_colnames)-3): print '{}\t{}\t{}'.format(expected_colnames[i+3].rjust(12), alns[i]*100/bps, ids[i]*100/alns[i])
def _list_subfolders(project, path, cached_folder_lists, recurse=True): if project not in cached_folder_lists: cached_folder_lists[project] = dxpy.get_handler(project).describe( input_params={'folders': True} )['folders'] # TODO: support shell-style path globbing (i.e. /a*/c matches /ab/c but not /a/b/c) # return pathmatch.filter(cached_folder_lists[project], os.path.join(path, '*')) if recurse: return (f for f in cached_folder_lists[project] if f.startswith(path)) else: return (f for f in cached_folder_lists[project] if f.startswith(path) and '/' not in f[len(path)+1:])
def _list_subfolders(project, path, cached_folder_lists, recurse=True): if project not in cached_folder_lists: cached_folder_lists[project] = dxpy.get_handler(project).describe( input_params={'folders': True})['folders'] # TODO: support shell-style path globbing (i.e. /a*/c matches /ab/c but not /a/b/c) # return pathmatch.filter(cached_folder_lists[project], os.path.join(path, '*')) if recurse: return (f for f in cached_folder_lists[project] if f.startswith(path)) else: return (f for f in cached_folder_lists[project] if f.startswith(path) and '/' not in f[len(path) + 1:])
def test_deepdirs(self): ''' Tests the use of subdirectories in the output directory ''' def check_output_key(job_output, out_param_name, num_files, dxproj): ''' check that an output key appears, and has the correct number of files ''' print('checking output for param={}'.format(out_param_name)) if out_param_name not in job_output: raise "Error: key {} does not appear in the job output".format(out_param_name) dxlink_id_list = job_output[out_param_name] if not len(dxlink_id_list) == num_files: raise Exception("Error: key {} should have {} files, but has {}". format(out_param_name, num_files, len(dxlink_id_list))) def verify_files_in_dir(path, expected_filenames, dxproj): ''' verify that a particular set of files resides in a directory ''' dir_listing = dxproj.list_folder(folder=path, only="objects") for elem in dir_listing["objects"]: handler = dxpy.get_handler(elem["id"]) if not isinstance(handler, dxpy.DXFile): continue if handler.name not in expected_filenames: raise Exception("Error: file {} should reside in directory {}". format(handler.name, path)) with temporary_project('TestDXBashHelpers.test_app1 temporary project') as dxproj: env = update_environ(DX_PROJECT_CONTEXT_ID=dxproj.get_id()) # Build the applet, patching in the bash helpers from the # local checkout applet_id = build_app_with_bash_helpers(os.path.join(TEST_APPS, 'deepdirs'), dxproj.get_id()) # Run the applet cmd_args = ['dx', 'run', '--yes', '--brief', applet_id] job_id = run(cmd_args, env=env).strip() dxpy.DXJob(job_id).wait_on_done() print("Test completed successfully, checking outputs\n") # Assertions about the output. There should be three result keys job_handler = dxpy.get_handler(job_id) job_output = job_handler.output check_output_key(job_output, "genes", 8, dxproj) check_output_key(job_output, "phenotypes", 7, dxproj) check_output_key(job_output, "report", 1, dxproj) check_output_key(job_output, "helix", 1, dxproj) verify_files_in_dir("/clue", ["X_1.txt", "X_2.txt", "X_3.txt"], dxproj) verify_files_in_dir("/hint", ["V_1.txt", "V_2.txt", "V_3.txt"], dxproj) verify_files_in_dir("/clue2", ["Y_1.txt", "Y_2.txt", "Y_3.txt"], dxproj) verify_files_in_dir("/hint2", ["Z_1.txt", "Z_2.txt", "Z_3.txt"], dxproj) verify_files_in_dir("/foo/bar", ["luke.txt"], dxproj) verify_files_in_dir("/", ["A.txt", "B.txt", "C.txt", "num_chrom.txt"], dxproj)
def cp(args): dest_proj, dest_path, _none = try_call(resolve_path, args.destination, 'folder') if dest_path is None: parser.exit(1, 'Cannot copy to a hash ID\n') dx_dest = dxpy.get_handler(dest_proj) try: # check if the destination exists dx_dest.list_folder(folder=dest_path, only='folders') except: cp_to_noexistent_destination(args, dest_path, dx_dest, dest_proj) return # The destination exists, we need to copy all of the sources to it. if len(args.sources) == 0: parser.exit(1, 'No sources provided to copy to another project\n') src_objects = [] src_folders = [] for source in args.sources: src_proj, src_folderpath, src_results = try_call(resolve_existing_path, source, allow_mult=True, all_mult=args.all) if src_proj == dest_proj: if is_hashid(source): # This is the only case in which the source project is # purely assumed, so give a better error message. parser.exit(1, fill('Error: You must specify a source project for ' + source) + '\n') else: parser.exit(1, fill('Error: A source path and the destination path resolved ' + 'to the same project or container. Please specify ' + 'different source and destination containers, e.g.') + '\n dx cp source-project:source-id-or-path dest-project:dest-path' + '\n') if src_proj is None: parser.exit(1, fill('Error: A source project must be specified or a current ' + 'project set in order to clone objects between projects') + '\n') if src_results is None: src_folders.append(src_folderpath) else: src_objects += [result['id'] for result in src_results] try: exists = dxpy.DXHTTPRequest('/' + src_proj + '/clone', {"objects": src_objects, "folders": src_folders, "project": dest_proj, "destination": dest_path})['exists'] if len(exists) > 0: print(fill('The following objects already existed in the destination container ' + 'and were left alone:') + '\n ' + '\n '.join(exists)) except: err_exit()
def resolve_project(identifier, privs="r"): project = dxpy.find_one_project(name=identifier, level="VIEW", name_mode="exact", return_handler=True, zero_ok=True) if project == None: try: project = dxpy.get_handler(identifier) except: logging.error("Could not find a unique project with name or id %s" % (identifier)) raise ValueError(identifier) logging.debug("Project %s access level is %s" % (project.name, project.describe()["level"])) if privs == "w" and project.describe()["level"] == "VIEW": logging.error("Output project %s is read-only" % (identifier)) raise ValueError(identifier) return project
def resolve_project(identifier, privs='r'): project = dxpy.find_one_project(name=identifier, level='VIEW', name_mode='exact', return_handler=True, zero_ok=True) if project == None: try: project = dxpy.get_handler(identifier) except: logging.error('Could not find a unique project with name or id %s' %(identifier)) raise ValueError(identifier) logging.debug('Project %s access level is %s' %(project.name, project.describe()['level'])) if privs == 'w' and project.describe()['level'] == 'VIEW': logging.error('Output project %s is read-only' %(identifier)) raise ValueError(identifier) return project
def check_input(args): dxpy.set_security_context({ "auth_token_type": "Bearer", "auth_token": args.api_token}) # Check API token and project context try: dxpy.get_handler(args.project).describe() except dxpy.exceptions.DXAPIError as e: if e.name == "InvalidAuthentication": raise_error("API token (%s) is not valid. %s" % (args.api_token, e)) if e.name == "PermissionDenied": raise_error("Project (%s) is not valid. %s" % (args.project, e)) except dxpy.exceptions.DXError as e: raise_error("Error getting project handler for project (%s). %s" % (args.project, e)) # Check that chained downstream applet is valid if args.applet: try: dxpy.get_handler(args.applet).describe() except dxpy.exceptions.DXAPIError as e: raise_error("Unable to resolve applet %s. %s" %(args.applet, e)) except dxpy.exceptions.DXError as e: raise_error("Error getting handler for applet (%s). %s" %(args.applet, e)) # Check that chained downstream workflow is valid if args.workflow: try: dxpy.get_handler(args.workflow).describe() except dxpy.exceptions.DXAPIError as e: raise_error("Unable to resolve workflow %s. %s" %(args.workflow, e)) except dxpy.exceptions.DXError as e: raise_error("Error getting handler for workflow (%s). %s" %(args.workflow, e)) # Check that executable to launch locally is executable if args.script: if not (os.path.isfile(args.script) and os.access(args.script, os.X_OK)): raise_error("Executable/script passed by -s: (%s) is not executable" %(args.script)) if not args.dxpy_upload: print_stderr("Checking if ua is in $PATH") try: sub.check_call(['ua', '--version'], stdout=open(os.devnull, 'w'), close_fds=True) except sub.CalledProcessError: raise_error("Upload agent executable 'ua' was not found in the $PATH") try: # We assume that dx_sync_directory is located in the same folder as this script # This is resolved by absolute path of invocation sub.check_call(['python', '{curr_dir}/dx_sync_directory.py'.format(curr_dir=sys.path[0]), '-h'], stdout=open(os.devnull, 'w'), close_fds=True) except sub.CalledProcessError: raise_error("dx_sync_directory.py not found. Please run incremental " + "upload from the directory containing incremental_upload.py "+ "and dx_sync_directory.py")
def main(**kwargs): if len(kwargs) == 0: args = parser.parse_args(sys.argv[1:]) else: args = parser.parse_args(kwargs) # Attempt to resolve name try: project, folderpath, entity_result = resolve_existing_path(args.path, expected='entity') except ResolutionError as details: parser.exit(1, fill(unicode(details)) + '\n') if entity_result is None: parser.exit(1, fill('Could not resolve ' + args.path + ' to a data object') + '\n') filename = args.output if filename is None: filename = entity_result['describe']['name'].replace('/', '%2F') dxtable = dxpy.get_handler(entity_result['id']) delimiter = ',' if args.csv else '\t' if args.output == '-': writer = csv.writer(sys.stdout, delimiter=delimiter) else: if args.output is None and not args.no_ext: filename += '.csv' if args.csv else '.tsv' if not args.overwrite and os.path.exists(filename): parser.exit(1, fill('Error: path \"' + filename + '\" already exists but -f/--overwrite was not set') + '\n') writer = csv.writer(open(filename, 'wb'), delimiter=delimiter) if not args.no_header: writer.writerow((['__id__:int'] if args.rowid else []) + [(col['name'] + ':' + col['type']) for col in dxtable.describe()['columns']]) # Query stuff if args.gri is not None: try: lo = int(args.gri[1]) hi = int(args.gri[2]) except: parser.exit(1, fill('Error: the LO and HI arguments to --gri must be integers') + '\n') gri_query = dxpy.DXGTable.genomic_range_query(args.gri[0], lo, hi, args.gri_mode, args.gri_name) iterator = dxtable.iterate_query_rows(query=gri_query, limit=args.limit) else: iterator = dxtable.iterate_rows(start=args.starting, end=(None if args.limit is None else args.starting + args.limit)) for row in iterator: writer.writerow([unicode(item).encode('utf-8') for item in row[0 if args.rowid else 1:]])
def new_workflow(args): try_call(process_dataobject_args, args) try_call(process_single_dataobject_output_args, args) init_from = None if args.init is not None: if is_analysis_id(args.init): init_from = args.init else: init_project, _init_folder, init_result = try_call( resolve_existing_path, args.init, expected='entity') init_from = dxpy.get_handler(init_result['id'], project=init_project) if args.output is None: project = dxpy.WORKSPACE_ID folder = dxpy.config.get("DX_CLI_WD", "/") name = None else: project, folder, name = try_call(dxpy.utils.resolver.resolve_path, args.output) if args.output_folder is not None: try: # Try to resolve to a path in the project _ignore, args.output_folder, _ignore = resolve_path( args.output_folder, expected='folder') except: # But if not, just use the value directly pass try: dxworkflow = dxpy.new_dxworkflow(title=args.title, summary=args.summary, description=args.description, output_folder=args.output_folder, project=project, name=name, tags=args.tags, types=args.types, hidden=args.hidden, properties=args.properties, details=args.details, folder=folder, parents=args.parents, init_from=init_from) if args.brief: print(dxworkflow.get_id()) else: dxpy.utils.describe.print_desc( dxworkflow.describe(incl_properties=True, incl_details=True), args.verbose) except: err_exit()
def cp(args): dest_proj, dest_path, _none = try_call(resolve_path, args.destination, expected='folder') if dest_path is None: raise DXCLIError('Cannot copy to a hash ID') dx_dest = dxpy.get_handler(dest_proj) try: # check if the destination exists dx_dest.list_folder(folder=dest_path, only='folders') except: cp_to_noexistent_destination(args, dest_path, dx_dest, dest_proj) return # The destination exists, we need to copy all of the sources to it. if len(args.sources) == 0: raise DXCLIError('No sources provided to copy to another project') src_objects = [] src_folders = [] for source in args.sources: src_proj, src_folderpath, src_results = try_call(resolve_existing_path, source, allow_mult=True, all_mult=args.all) if src_proj == dest_proj: if is_hashid(source): # This is the only case in which the source project is # purely assumed, so give a better error message. raise DXCLIError(fill('Error: You must specify a source project for ' + source)) else: raise DXCLIError(fill('Error: A source path and the destination path resolved ' + 'to the same project or container. Please specify ' + 'different source and destination containers, e.g.') + '\n dx cp source-project:source-id-or-path dest-project:dest-path') if src_proj is None: raise DXCLIError(fill('Error: A source project must be specified or a current ' + 'project set in order to clone objects between projects')) if src_results is None: src_folders.append(src_folderpath) else: src_objects += [result['id'] for result in src_results] try: exists = dxpy.DXHTTPRequest('/' + src_proj + '/clone', {"objects": src_objects, "folders": src_folders, "project": dest_proj, "destination": dest_path})['exists'] if len(exists) > 0: print(fill('The following objects already existed in the destination container ' + 'and were left alone:') + '\n ' + '\n '.join(exists)) except: err_exit()
def add_file(iname, subdir, value): if not dxpy.is_dxlink(value): return handler = dxpy.get_handler(value) if not isinstance(handler, dxpy.DXFile): return filename = make_unix_filename(handler.name) trg_dir = iname if subdir is not None: trg_dir = os.path.join(trg_dir, subdir) files[iname].append({'trg_fname': os.path.join(trg_dir, filename), 'handler': handler, 'src_file_id': handler.id}) dirs.append(trg_dir)
def upload_resources(src_dir, project=None, folder='/'): """ :returns: A list (possibly empty) of references to the generated archive(s) :rtype: list If it exists, archives and uploads the contents of the ``resources/`` subdirectory of *src_dir* to a new remote file object, and returns a list describing a single bundled dependency in the form expected by the ``bundledDepends`` field of a run specification. Returns an empty list, if no archive was created. """ applet_spec = _get_applet_spec(src_dir) if project is None: dest_project = applet_spec['project'] else: dest_project = project applet_spec['project'] = project resources_dir = os.path.join(src_dir, "resources") if os.path.exists(resources_dir) and len(os.listdir(resources_dir)) > 0: logger.debug("Uploading in " + src_dir) with tempfile.NamedTemporaryFile(suffix=".tar.gz") as tar_fh: subprocess.check_call(['tar', '-C', resources_dir, '-czf', tar_fh.name, '.']) if 'folder' in applet_spec: try: dxpy.get_handler(dest_project).new_folder(applet_spec['folder'], parents=True) except dxpy.exceptions.DXAPIError: pass # TODO: make this better target_folder = applet_spec['folder'] if 'folder' in applet_spec else folder dx_resource_archive = dxpy.upload_local_file(tar_fh.name, wait_on_close=True, project=dest_project, folder=target_folder, hidden=True) archive_link = dxpy.dxlink(dx_resource_archive.get_id()) return [{'name': 'resources.tar.gz', 'id': archive_link}] else: return []
def analysis_describe_with_retry(analysis_id_or_handler): if isinstance(analysis_id_or_handler, basestring): handler = dxpy.get_handler(analysis_id_or_handler) else: handler = analysis_id_or_handler # All the describe fields may not be available immediately. Wait # until they have been populated. for i in range(200): # Don't wait an unbounded amount of time desc = handler.describe() # Sufficient to look for any field, other than 'id', that is # present in all job describe hashes if all('executable' in stage['execution'] for stage in desc['stages']): return desc time.sleep(0.5) raise IOError('Timed out while waiting for ' + analysis_id_or_handler.get_id() + ' to have all jobs populated')
def get_dxfile(filePath, project=None): '''Returns dxfile object.''' dxfile = None #if filePath.find("$dnanexus_link") != -1: # filePath = filePath.split(' ')[1] # filePath = filePath.replace("'","").replace('"','').replace("}","").replace("{","") try: dxlink = json.loads(filePath.strip("'")) except: dxlink = None if project != None: try: if dxlink != None: dxfile = dxpy.get_handler(dxlink, project=project) else: dxfile = dxpy.get_handler(filePath, project=project) except: try: dxlink = dxpy.dxlink(filePath, project=project) dxfile = dxpy.get_handler(dxlink) except: try: proj_id = env_get_current_project_id() dxfile = dxpy.DXFile(filePath, project=proj_id) except: sys.stderr.write('ERROR: unable to find file "' + filePath + '": \n') sys.exit(0) # Do not error on tool run in dx script else: try: if dxlink != None: dxfile = dxpy.get_handler(dxlink) else: dxfile = dxpy.get_handler(filePath) except: try: dxlink = dxpy.dxlink(filePath) dxfile = dxpy.get_handler(dxlink) except: try: proj_id = env_get_current_project_id() dxfile = dxpy.DXFile(filePath, project=proj_id) except: sys.stderr.write('ERROR: unable to find file "' + filePath + '": \n') sys.exit(0) # Do not error on tool run in dx script if dxfile == None: sys.stderr.write('ERROR: unable to find file "' + filePath + '": \n') sys.exit(0) # Do not error on tool run in dx script return dxfile
def add_file(iname, subdir, value): if not dxpy.is_dxlink(value): return handler = dxpy.get_handler(value) if not isinstance(handler, dxpy.DXFile): return filename = make_unix_filename(handler.name) trg_dir = iname if subdir is not None: trg_dir = os.path.join(trg_dir, subdir) files[iname].append({ 'trg_fname': os.path.join(trg_dir, filename), 'handler': handler, 'src_file_id': handler.id }) dirs.append(trg_dir)
def get_dxfile(filePath,project=None): '''Returns dxfile object.''' dxfile = None #if filePath.find("$dnanexus_link") != -1: # filePath = filePath.split(' ')[1] # filePath = filePath.replace("'","").replace('"','').replace("}","").replace("{","") try: dxlink = json.loads(filePath.strip("'")) except: dxlink = None if project != None: try: if dxlink != None: dxfile = dxpy.get_handler(dxlink,project=project) else: dxfile = dxpy.get_handler(filePath,project=project) except: try: dxlink = dxpy.dxlink(filePath,project=project) dxfile = dxpy.get_handler(dxlink) except: try: proj_id = env_get_current_project_id() dxfile = dxpy.DXFile(filePath,project=proj_id) except: sys.stderr.write('WARNING: unable to find file "' + filePath + '": \n') sys.exit(0) # Do not error on tool run in dx script else: try: if dxlink != None: dxfile = dxpy.get_handler(dxlink) else: dxfile = dxpy.get_handler(filePath) except: try: dxlink = dxpy.dxlink(filePath) dxfile = dxpy.get_handler(dxlink) except: try: proj_id = env_get_current_project_id() dxfile = dxpy.DXFile(filePath,project=proj_id) except: sys.stderr.write('WARNING: unable to find file "' + filePath + '": \n') sys.exit(0) # Do not error on tool run in dx script if dxfile == None: sys.stderr.write('WARNING: unable to find file "' + filePath + '": \n') sys.exit(0) # Do not error on tool run in dx script return dxfile
def new_workflow(args): try_call(process_dataobject_args, args) try_call(process_single_dataobject_output_args, args) init_from = None if args.init is not None: if is_analysis_id(args.init): init_from = args.init else: init_project, _init_folder, init_result = try_call(resolve_existing_path, args.init, expected="entity") init_from = dxpy.get_handler(init_result["id"], project=init_project) if args.output is None: project = dxpy.WORKSPACE_ID folder = get_env_var("DX_CLI_WD", "/") name = None else: project, folder, name = dxpy.utils.resolver.resolve_path(args.output) if args.output_folder is not None: try: # Try to resolve to a path in the project _ignore, args.output_folder, _ignore = resolve_path(args.output_folder, expected="folder") except: # But if not, just use the value directly pass try: dxworkflow = dxpy.new_dxworkflow( title=args.title, summary=args.summary, description=args.description, output_folder=args.output_folder, project=project, name=name, tags=args.tags, types=args.types, hidden=args.hidden, properties=args.properties, details=args.details, folder=folder, parents=args.parents, init_from=init_from, ) if args.brief: print(dxworkflow.get_id()) else: dxpy.utils.describe.print_desc(dxworkflow.describe(incl_properties=True, incl_details=True), args.verbose) except: err_exit()
def main(token): # Configure dxpy authentication dxpy.set_security_context({'auth_token_type': 'Bearer', 'auth_token': token}) # Resolve FACTORY_PROJECT by ID proj = dxpy.DXProject(FACTORY_PROJECT) print 'Resolved project:', proj.describe()['name'], proj.get_id() # Set FACTORY_PROJECT as the workspace for subsequent operations # (sort of like the current working directory) dxpy.set_workspace_id(FACTORY_PROJECT) # Resolve the workflow by name. (Could also store ID like the project) wf = list(dxpy.search.find_data_objects(classname="workflow", name="RNA-seq pipeline", return_handler=True))[0] print 'Resolved workflow:', wf.describe()['name'], wf.get_id() # TODO: Stage the inputs. Here we find them in the IN folder left_reads = list(dxpy.search.find_data_objects(classname="file", name="ENCFF001JPX.1k.fastq.gz", folder="/IN", return_handler=True))[0] print 'Resolved left reads:', left_reads.describe()['name'], left_reads.get_id() right_reads = list(dxpy.search.find_data_objects(classname="file", name="ENCFF001JQB.1k.fastq.gz", folder="/IN", return_handler=True))[0] print 'Resolved right reads:', right_reads.describe()['name'], right_reads.get_id() # Launch the workflow analysis = wf.run({'0.fastqs': [dxpy.dxlink(left_reads.get_id())], '0.fastq_pairs': [dxpy.dxlink(right_reads.get_id())]}) print 'Launched analysis:', analysis.get_id() print 'Analysis state:', analysis.describe()['state'] # TODO: Poll for (or come back when) analysis state 'done' or 'failed'. # Handle any failures. # Cooking-show-style substitution with completed analysis analysis = dxpy.DXAnalysis(COMPLETED_ANALYSIS) print 'Analysis state:', analysis.describe()['state'] # Enumerate outputs print 'Analysis outputs:' for one_output_name, one_output_link in analysis.describe()['output'].iteritems(): one_output = dxpy.get_handler(one_output_link) # one_output : dxpy.DXFile one_file_name = one_output.describe()['name'] one_file_url, _ = one_output.get_download_url(preauthenticated=True, filename=one_file_name) print one_file_name, one_file_url
def resolve_dx_file(identifier): try: handler = dxpy.get_handler(identifier) except dxpy.DXError: try: handler = dxpy.find_one_data_object( classname='file', name=identifier, return_handler=True, zero_ok=False, more_ok=False) except dxpy.DXSearchError: logging.error('Failed to resolve control %s to unique dx object. ID or name does not exist or multiple files of that name were found.' % (str(identifier))) return None else: return handler else: return handler
def wait_for_depends_on(depends_on, all_job_outputs): # Wait for depends_on and any data objects in the input to close if len(depends_on) > 0: print(fill('Processing dependsOn and any DNAnexus links to closing objects in the input')) for an_id in depends_on: try: print(' Waiting for ' + an_id + '...') if an_id.startswith('localjob'): if all_job_outputs.get(an_id) is None: raise Exception('Job ' + an_id + ' could not be found in local finished jobs') elif an_id.startswith('job'): dxjob = dxpy.DXJob(an_id) dxjob.wait_on_done() else: handler = dxpy.get_handler(an_id) desc = handler.describe() handler._wait_on_close() except Exception as e: raise Exception('Could not wait for ' + an_id + ': ' + str(e))
def new_workflow(args): try_call(process_dataobject_args, args) try_call(process_single_dataobject_output_args, args) init_from = None if args.init is not None: try: init_project, init_folder, init_result = try_call(resolve_existing_path, args.init, expected='entity') init_from = dxpy.get_handler(init_result['id'], project=init_project) except: init_from = args.init if args.output is None: project = dxpy.WORKSPACE_ID folder = os.environ.get('DX_CLI_WD', '/') name = None else: project, folder, name = dxpy.utils.resolver.resolve_path(args.output) if args.output_folder is not None: try: # Try to resolve to a path in the project ignore, args.output_folder, ignore2 = resolve_path(args.output_folder, expected='folder') except: # But if not, just use the value directly pass try: dxworkflow = dxpy.new_dxworkflow(title=args.title, summary=args.summary, description=args.description, output_folder=args.output_folder, project=project, name=name, tags=args.tags, types=args.types, hidden=args.hidden, properties=args.properties, details=args.details, folder=folder, parents=args.parents, init_from=init_from) if args.brief: print dxworkflow.get_id() else: dxpy.utils.describe.print_desc(dxworkflow.describe(incl_properties=True, incl_details=True), args.verbose) except: err_exit()
def check_file_content(out_param_name, out_filename, tmp_fname, str_content): """ Download a file, read it from local disk, and verify that it has the correct contents """ if not out_param_name in job_output: raise "Error: key {} does not appear in the job output".format(out_param_name) dxlink = job_output[out_param_name] # check that the filename gets preserved trg_fname = dxpy.get_handler(dxlink).name self.assertEqual(trg_fname, out_filename) # download the file and check the contents silent_file_remove(tmp_fname) dxpy.download_dxfile(dxlink, tmp_fname) with open(tmp_fname, "r") as fh: data = fh.read() print(data) if not (strip_white_space(data) == strip_white_space(str_content)): raise Exception("contents of file {} do not match".format(out_param_name)) silent_file_remove(tmp_fname)
def test_dxfs_operations(self): # FIXME: Make the mount live or add command to refresh it with remote changes #subprocess.check_call(['dx', 'mkdir', 'foo']) #subprocess.check_call(['dx', 'mkdir', 'bar']) #subprocess.check_call(['dx', 'mkdir', '-p', '/bar/baz']) self.assertEqual(set(os.listdir(self.mountpoint)), set(['foo', 'bar', os.path.basename(__file__)])) # Reading self.assertEqual(open(__file__).read(), open(os.path.join(self.mountpoint, __file__)).read()) # Moving shutil.move(os.path.join(self.mountpoint, __file__), os.path.join(self.mountpoint, __file__+"2")) self.assertEqual(set(os.listdir(self.mountpoint)), set(['foo', 'bar', os.path.basename(__file__+"2")])) shutil.move(os.path.join(self.mountpoint, __file__+"2"), os.path.join(self.mountpoint, "foo")) self.assertEqual(set(os.listdir(os.path.join(self.mountpoint, 'foo'))), set([os.path.basename(__file__+"2")])) folder_listing = self.project.list_folder('/foo') self.assertEqual(len(folder_listing['folders']), 0) self.assertEqual(len(folder_listing['objects']), 1) self.assertEqual(dxpy.get_handler(folder_listing['objects'][0]['id']).name, os.path.basename(__file__+"2")) self.assertEqual(open(__file__).read(), open(os.path.join(self.mountpoint, 'foo', __file__+"2")).read())
def list_subfolders(project, path, recurse=True): ''' :param project: Project ID to use as context for the listing :type project: string :param path: Subtree root path :type path: string :param recurse: Return a complete subfolders tree :type recurse: boolean Returns a list of subfolders for the remote *path* (included to the result) of the *project*. Example:: list_subfolders("project-xxxx", folder="/input") ''' project_folders = dxpy.get_handler(project).describe(input_params={'folders': True})['folders'] # TODO: support shell-style path globbing (i.e. /a*/c matches /ab/c but not /a/b/c) # return pathmatch.filter(project_folders, os.path.join(path, '*')) if recurse: return (f for f in project_folders if f.startswith(path)) else: return (f for f in project_folders if f.startswith(path) and '/' not in f[len(path)+1:])
def _find(api_method, query, limit, return_handler, **kwargs): ''' Takes an API method handler (dxpy.api.find...) and calls it with *query*, then wraps a generator around its output. Used by the methods below. ''' num_results = 0 while True: resp = api_method(query, **kwargs) for i in resp["results"]: if num_results == limit: raise StopIteration() num_results += 1 if return_handler: handler = dxpy.get_handler(i['id'], project=i.get('project')) yield handler else: yield i # set up next query if resp["next"] is not None: query["starting"] = resp["next"] else: raise StopIteration()
def _find(api_method, query, limit, return_handler, **kwargs): """ Takes an API method handler (dxpy.api.find...) and calls it with *query*, then wraps a generator around its output. Used by the methods below. """ num_results = 0 while True: resp = api_method(query, **kwargs) for i in resp["results"]: if num_results == limit: raise StopIteration() num_results += 1 if return_handler: handler = dxpy.get_handler(i["id"], project=i.get("project")) yield handler else: yield i # set up next query if resp["next"] is not None: query["starting"] = resp["next"] else: raise StopIteration()
def upload_resources(src_dir, project=None, folder='/', ensure_upload=False, force_symlinks=False): """ :param ensure_upload: If True, will bypass checksum of resources directory and upload resources bundle unconditionally; will NOT be able to reuse this bundle in future builds. Else if False, will compute checksum and upload bundle if checksum is different from a previously uploaded bundle's checksum. :type ensure_upload: boolean :param force_symlinks: If true, will bypass the attempt to dereference any non-local symlinks and will unconditionally include the link as-is. Note that this will almost certainly result in a broken link within the resource directory unless you really know what you're doing. :type force_symlinks: boolean :returns: A list (possibly empty) of references to the generated archive(s) :rtype: list If it exists, archives and uploads the contents of the ``resources/`` subdirectory of *src_dir* to a new remote file object, and returns a list describing a single bundled dependency in the form expected by the ``bundledDepends`` field of a run specification. Returns an empty list, if no archive was created. """ applet_spec = _get_applet_spec(src_dir) if project is None: dest_project = applet_spec['project'] else: dest_project = project applet_spec['project'] = project resources_dir = os.path.join(src_dir, "resources") if os.path.exists(resources_dir) and len(os.listdir(resources_dir)) > 0: target_folder = applet_spec[ 'folder'] if 'folder' in applet_spec else folder # While creating the resource bundle, optimistically look for a # resource bundle with the same contents, and reuse it if possible. # The resource bundle carries a property 'resource_bundle_checksum' # that indicates the checksum; the way in which the checksum is # computed is given below. If the checksum matches (and # ensure_upload is False), then we will use the existing file, # otherwise, we will compress and upload the tarball. # The input to the SHA1 contains entries of the form (whitespace # only included here for readability): # # / \0 MODE \0 MTIME \0 # /foo \0 MODE \0 MTIME \0 # ... # # where there is one entry for each directory or file (order is # specified below), followed by a numeric representation of the # mode, and the mtime in milliseconds since the epoch. # # Note when looking at a link, if the link is to be dereferenced, # the mtime and mode used are that of the target (using os.stat()) # If the link is to be kept as a link, the mtime and mode are those # of the link itself (using os.lstat()) with tempfile.NamedTemporaryFile(suffix=".tar") as tar_tmp_fh: output_sha1 = hashlib.sha1() tar_fh = tarfile.open(fileobj=tar_tmp_fh, mode='w') for dirname, subdirs, files in os.walk(resources_dir): if not dirname.startswith(resources_dir): raise AssertionError( 'Expected %r to start with root directory %r' % (dirname, resources_dir)) # Add an entry for the directory itself relative_dirname = dirname[len(resources_dir):] dir_stat = os.lstat(dirname) if not relative_dirname.startswith('/'): relative_dirname = '/' + relative_dirname fields = [ relative_dirname, str(_fix_perms(dir_stat.st_mode)), str(int(dir_stat.st_mtime * 1000)) ] output_sha1.update(b''.join( s.encode('utf-8') + b'\0' for s in fields)) # add an entry in the tar file for the current directory, but # do not recurse! tar_fh.add(dirname, arcname='.' + relative_dirname, recursive=False, filter=_fix_perm_filter) # Canonicalize the order of subdirectories; this is the order in # which they will be visited by os.walk subdirs.sort() # check the subdirectories for symlinks. We should throw an error # if there are any links that point outside of the directory (unless # --force-symlinks is given). If a link is pointing internal to # the directory (or --force-symlinks is given), we should add it # as a file. for subdir_name in subdirs: dir_path = os.path.join(dirname, subdir_name) # If we do have a symlink, if os.path.islink(dir_path): # Let's get the pointed-to path to ensure that it is # still in the directory link_target = os.readlink(dir_path) # If this is a local link, add it to the list of files (case 1) # else raise an error if force_symlinks or is_link_local(link_target): files.append(subdir_name) else: raise AppBuilderException( "Cannot include symlinks to directories outside of the resource directory. '%s' points to directory '%s'" % (dir_path, os.path.realpath(dir_path))) # Canonicalize the order of files so that we compute the # checksum in a consistent order for filename in sorted(files): deref_link = False relative_filename = os.path.join(relative_dirname, filename) true_filename = os.path.join(dirname, filename) file_stat = os.lstat(true_filename) # check for a link here, please! if os.path.islink(true_filename): # Get the pointed-to path link_target = os.readlink(true_filename) if not (force_symlinks or is_link_local(link_target)): # if we are pointing outside of the directory, then: # try to get the true stat of the file and make sure # to dereference the link! try: file_stat = os.stat( os.path.join(dirname, link_target)) deref_link = True except OSError: # uh-oh! looks like we have a broken link! # since this is guaranteed to cause problems (and # we know we're not forcing symlinks here), we # should throw an error raise AppBuilderException( "Broken symlink: Link '%s' points to '%s', which does not exist" % (true_filename, os.path.realpath(true_filename))) fields = [ relative_filename, str(_fix_perms(file_stat.st_mode)), str(int(file_stat.st_mtime * 1000)) ] output_sha1.update(b''.join( s.encode('utf-8') + b'\0' for s in fields)) # If we are to dereference, use the target fn if deref_link: true_filename = os.path.realpath(true_filename) tar_fh.add(true_filename, arcname='.' + relative_filename, filter=_fix_perm_filter) # end for filename in sorted(files) # end for dirname, subdirs, files in os.walk(resources_dir): # at this point, the tar is complete, so close the tar_fh tar_fh.close() # Optimistically look for a resource bundle with the same # contents, and reuse it if possible. The resource bundle # carries a property 'resource_bundle_checksum' that indicates # the checksum; the way in which the checksum is computed is # given in the documentation of _directory_checksum. if ensure_upload: properties_dict = {} existing_resources = False else: directory_checksum = output_sha1.hexdigest() properties_dict = dict( resource_bundle_checksum=directory_checksum) existing_resources = dxpy.find_one_data_object( project=dest_project, folder=target_folder, properties=dict( resource_bundle_checksum=directory_checksum), visibility='either', zero_ok=True, state='closed', return_handler=True) if existing_resources: logger.info( "Found existing resource bundle that matches local resources directory: " + existing_resources.get_id()) dx_resource_archive = existing_resources else: logger.debug("Uploading in " + src_dir) # We need to compress the tar that we've created targz_fh = tempfile.NamedTemporaryFile(suffix=".tar.gz", delete=False) # compress the file by reading the tar file and passing # it though a GzipFile object, writing the given # block size (by default 8192 bytes) at a time targz_gzf = gzip.GzipFile(fileobj=targz_fh, mode='wb') tar_tmp_fh.seek(0) dat = tar_tmp_fh.read(io.DEFAULT_BUFFER_SIZE) while dat: targz_gzf.write(dat) dat = tar_tmp_fh.read(io.DEFAULT_BUFFER_SIZE) targz_gzf.flush() targz_gzf.close() targz_fh.close() if 'folder' in applet_spec: try: dxpy.get_handler(dest_project).new_folder( applet_spec['folder'], parents=True) except dxpy.exceptions.DXAPIError: pass # TODO: make this better dx_resource_archive = dxpy.upload_local_file( targz_fh.name, wait_on_close=True, project=dest_project, folder=target_folder, hidden=True, properties=properties_dict) os.unlink(targz_fh.name) # end compressed file creation and upload archive_link = dxpy.dxlink(dx_resource_archive.get_id()) # end tempfile.NamedTemporaryFile(suffix=".tar") as tar_fh return [{'name': 'resources.tar.gz', 'id': archive_link}] else: return []
def upload_resources(src_dir, project=None, folder='/'): """ :returns: A list (possibly empty) of references to the generated archive(s) :rtype: list If it exists, archives and uploads the contents of the ``resources/`` subdirectory of *src_dir* to a new remote file object, and returns a list describing a single bundled dependency in the form expected by the ``bundledDepends`` field of a run specification. Returns an empty list, if no archive was created. """ applet_spec = _get_applet_spec(src_dir) if project is None: dest_project = applet_spec['project'] else: dest_project = project applet_spec['project'] = project resources_dir = os.path.join(src_dir, "resources") if os.path.exists(resources_dir) and len(os.listdir(resources_dir)) > 0: target_folder = applet_spec[ 'folder'] if 'folder' in applet_spec else folder # Optimistically look for a resource bundle with the same # contents, and reuse it if possible. The resource bundle # carries a property 'resource_bundle_checksum' that indicates # the checksum; the way in which the checksum is computed is # given in the documentation of _directory_checksum. directory_checksum = _directory_checksum(resources_dir) existing_resources = dxpy.find_one_data_object( project=dest_project, folder=target_folder, properties=dict(resource_bundle_checksum=directory_checksum), visibility='either', zero_ok=True, state='closed', return_handler=True) if existing_resources: logger.info( "Found existing resource bundle that matches local resources directory: " + existing_resources.get_id()) dx_resource_archive = existing_resources else: logger.debug("Uploading in " + src_dir) with tempfile.NamedTemporaryFile(suffix=".tar.gz") as tar_fh: # The directory contents may have changed since the # first time we checksummed the directory. Ideally we # would extract the tar file to determine the checksum # of the actually archived files, but maybe this is a # little too paranoid. subprocess.check_call( ['tar', '-C', resources_dir, '-czf', tar_fh.name, '.']) if 'folder' in applet_spec: try: dxpy.get_handler(dest_project).new_folder( applet_spec['folder'], parents=True) except dxpy.exceptions.DXAPIError: pass # TODO: make this better dx_resource_archive = dxpy.upload_local_file( tar_fh.name, wait_on_close=True, project=dest_project, folder=target_folder, hidden=True, properties=dict( resource_bundle_checksum=directory_checksum)) archive_link = dxpy.dxlink(dx_resource_archive.get_id()) return [{'name': 'resources.tar.gz', 'id': archive_link}] else: return []
def path_completer(text, expected=None, classes=None, perm_level=None, include_current_proj=False, typespec=None, visibility=None): ''' :param text: String to tab-complete to a path matching the syntax project-name:folder/entity_or_folder_name :type text: string :param expected: "folder", "entity", "project", or None (no restriction) as to the types of answers to look for :type expected: string :param classes: if expected="entity", the possible data object classes that are acceptable :type classes: list of strings :param perm_level: the minimum permissions level required, e.g. "VIEW" or "CONTRIBUTE" :type perm_level: string :param include_current_proj: Indicate whether the current project's name should be a potential result :type include_current_proj: boolean :param visibility: Visibility with which to restrict the completion (one of "either", "visible", or "hidden") (default behavior is dependent on *text*) Returns a list of matches to the text and restricted by the requested parameters. ''' colon_pos = get_last_pos_of_char(':', text) slash_pos = get_last_pos_of_char('/', text) delim_pos = max(colon_pos, slash_pos) # First get projects if necessary matches = [] if expected == 'project' and colon_pos > 0 and colon_pos == len(text) - 1: if dxpy.find_one_project(zero_ok=True, name=text[:colon_pos]) is not None: return [text + " "] if colon_pos < 0 and slash_pos < 0: # Might be tab-completing a project, but don't ever include # whatever's set as dxpy.WORKSPACE_ID unless expected == "project" # Also, don't bother if text=="" and expected is NOT "project" # Also, add space if expected == "project" if text != "" or expected == 'project': results = dxpy.find_projects(describe=True, level=perm_level) if not include_current_proj: results = [r for r in results if r['id'] != dxpy.WORKSPACE_ID] matches += [escape_colon(r['describe']['name'])+':' for r in results if r['describe']['name'].startswith(text)] if expected == 'project': return matches # Attempt to tab-complete to a folder or data object name if colon_pos < 0 and slash_pos >= 0: # Not tab-completing a project, and the project is unambiguous # (use dxpy.WORKSPACE_ID) if dxpy.WORKSPACE_ID is not None: # try-catch block in case dxpy.WORKSPACE_ID is garbage try: dxproj = dxpy.get_handler(dxpy.WORKSPACE_ID) folderpath, entity_name = clean_folder_path(text) matches += get_folder_matches(text, slash_pos, dxproj, folderpath) if expected != 'folder': if classes is not None: for classname in classes: matches += get_data_matches(text, slash_pos, dxproj, folderpath, classname=classname, typespec=typespec, visibility=visibility) else: matches += get_data_matches(text, slash_pos, dxproj, folderpath, typespec=typespec, visibility=visibility) except: pass else: # project is given by a path, but attempt to resolve to an # object or folder anyway try: proj_ids, folderpath, entity_name = resolve_path(text, multi_projects=True) except ResolutionError as details: sys.stderr.write("\n" + fill(unicode(details))) return matches for proj in proj_ids: # protects against dxpy.WORKSPACE_ID being garbage try: dxproj = dxpy.get_handler(proj) matches += get_folder_matches(text, delim_pos, dxproj, folderpath) if expected != 'folder': if classes is not None: for classname in classes: matches += get_data_matches(text, delim_pos, dxproj, folderpath, classname=classname, typespec=typespec, visibility=visibility) else: matches += get_data_matches(text, delim_pos, dxproj, folderpath, typespec=typespec, visibility=visibility) except: pass return matches
def test_sub_jobs(self): ''' Tests a bash script that generates sub-jobs ''' with temporary_project( 'TestDXBashHelpers.test_app1 temporary project') as dxproj: env = update_environ(DX_PROJECT_CONTEXT_ID=dxproj.get_id()) # Upload some files for use by the applet dxpy.upload_string("1234\n", project=dxproj.get_id(), name="A.txt") dxpy.upload_string("ABCD\n", project=dxproj.get_id(), name="B.txt") # Build the applet, patching in the bash helpers from the # local checkout applet_id = build_app_with_bash_helpers( os.path.join(TEST_APPS, 'with-subjobs'), dxproj.get_id()) # Run the applet. # Since the job creates two sub-jobs, we need to be a bit more sophisticated # in order to wait for completion. applet_args = ["-ifiles=A.txt", "-ifiles=B.txt"] cmd_args = ['dx', 'run', '--yes', '--brief', applet_id] cmd_args.extend(applet_args) job_id = run(cmd_args, env=env).strip() dxpy.DXJob(job_id).wait_on_done() # Assertions -- making sure the script worked # Assertions to make about the job's output after it is done running: # - *first_file* is a file named first_file.txt containing the string: # "contents of first_file" # - *final_file* is a file named final_file.txt containing the # *concatenation of the two input files in *files* print("Test completed successfully, checking file content\n") job_handler = dxpy.get_handler(job_id) job_output = job_handler.output def strip_white_space(_str): return ''.join(_str.split()) def silent_file_remove(filename): try: os.remove(filename) except OSError: pass # The output should include two files, this section verifies that they have # the correct data. def check_file_content(out_param_name, out_filename, tmp_fname, str_content): """ Download a file, read it from local disk, and verify that it has the correct contents """ if not out_param_name in job_output: raise "Error: key {} does not appear in the job output".format( out_param_name) dxlink = job_output[out_param_name] # check that the filename gets preserved trg_fname = dxpy.get_handler(dxlink).name self.assertEqual(trg_fname, out_filename) # download the file and check the contents silent_file_remove(tmp_fname) dxpy.download_dxfile(dxlink, tmp_fname) with open(tmp_fname, "r") as fh: data = fh.read() print(data) if not (strip_white_space(data) == strip_white_space(str_content)): raise Exception( "contents of file {} do not match".format( out_param_name)) silent_file_remove(tmp_fname) check_file_content('first_file', 'first_file.txt', "f1.txt", "contents of first_file") check_file_content('final_file', 'final_file.txt', "f2.txt", "1234ABCD")
def interactive_help(in_class, param_desc, prompt): is_array = param_desc['class'].startswith("array:") print_param_help(param_desc) print() array_help_str = ', or <ENTER> to finish the list of inputs' if in_class in dx_data_classes: # Class is some sort of data object if dxpy.WORKSPACE_ID is not None: proj_name = None try: proj_name = dxpy.api.project_describe(dxpy.WORKSPACE_ID)['name'] except: pass if proj_name is not None: print('Your current working directory is ' + proj_name + ':' + dxpy.config.get('DX_CLI_WD', '/')) while True: print('Pick an option to find input data:') try: opt_num = pick(['List and choose from available data in the current project', 'List and choose from available data in the DNAnexus Reference Genomes project', 'Select another project to list and choose available data', 'Select an output from a previously-run job (current project only)', 'Return to original prompt (specify an ID or path directly)']) except KeyboardInterrupt: opt_num = 4 if opt_num == 0: query_project = dxpy.WORKSPACE_ID elif opt_num == 1: query_project = dxpy.find_one_project(name="Reference Genome Files", public=True, billed_to="org-dnanexus", level="VIEW")['id'] elif opt_num == 2: project_generator = dxpy.find_projects(level='VIEW', describe=True, explicit_perms=True) print('\nProjects to choose from:') query_project = paginate_and_pick(project_generator, (lambda result: result['describe']['name']))['id'] if opt_num in range(3): result_generator = dxpy.find_data_objects(classname=in_class, typename=param_desc.get('type'), describe=True, project=query_project) print('\nAvailable data:') result_choice = paginate_and_pick(result_generator, (lambda result: get_ls_l_desc(result['describe']))) if result_choice == 'none found': print('No compatible data found') continue elif result_choice == 'none picked': continue else: return [result_choice['project'] + ':' + result_choice['id']] elif opt_num == 3: # Select from previous jobs in current project result_generator = dxpy.find_jobs(project=dxpy.WORKSPACE_ID, describe=True, parent_job="none") print() print('Previously-run jobs to choose from:') result_choice = paginate_and_pick(result_generator, (lambda result: get_find_executions_string(result['describe'], has_children=False, single_result=True)), filter_fn=(lambda result: result['describe']['state'] not in ['unresponsive', 'terminating', 'terminated', 'failed'])) if result_choice == 'none found': print('No jobs found') continue elif result_choice == 'none picked': continue else: if 'output' in result_choice['describe'] and result_choice['describe']['output'] != None: keys = result_choice['describe']['output'].keys() else: exec_handler = dxpy.get_handler(result_choice.get('app', result_choice['applet'])) exec_desc = exec_handler.describe() if 'outputSpec' not in exec_desc: # This if block will either continue, return, or raise print('No output spec found for the executable') try: field = input('Output field to use (^C or <ENTER> to cancel): ') if field == '': continue else: return [result_choice['id'] + ':' + field] except KeyboardInterrupt: continue else: keys = exec_desc['outputSpec'].keys() if len(keys) > 1: print('\nOutput fields to choose from:') field_choice = pick(keys) return [result_choice['id'] + ':' + keys[field_choice]] elif len(keys) == 1: print('Using the only output field: ' + keys[0]) return [result_choice['id'] + ':' + keys[0]] else: print('No available output fields') else: print(fill('Enter an ID or path (<TAB> twice for compatible ' + in_class + 's in current directory)' + (array_help_str if is_array else ''))) return shlex.split(input(prompt)) else: if in_class == 'boolean': if is_array: print(fill('Enter "true", "false"' + array_help_str)) else: print(fill('Enter "true" or "false"')) elif in_class == 'string' and is_array: print(fill('Enter a nonempty string' + array_help_str)) elif (in_class == 'float' or in_class == 'int') and is_array: print(fill('Enter a number' + array_help_str)) elif in_class == 'hash': print(fill('Enter a quoted JSON hash')) result = input(prompt) if in_class == 'string': return [result] else: return shlex.split(result)
def interactive_help(in_class, param_desc, prompt): is_array = param_desc['class'].startswith("array:") print_param_help(param_desc) print() array_help_str = ', or <ENTER> to finish the list of inputs' if in_class in dx_data_classes: # Class is some sort of data object if dxpy.WORKSPACE_ID is not None: proj_name = None try: proj_name = dxpy.api.project_describe(dxpy.WORKSPACE_ID)['name'] except: pass if proj_name is not None: print('Your current working directory is ' + proj_name + ':' + dxpy.config.get('DX_CLI_WD', '/')) while True: print('Pick an option to find input data:') try: opt_num = pick(['List and choose from available data in the current project', 'List and choose from available data in the DNAnexus Reference Genomes Files project', 'Select another project to list and choose available data', 'Select an output from a previously-run job (current project only)', 'Return to original prompt (specify an ID or path directly)']) except KeyboardInterrupt: opt_num = 4 if opt_num == 0: query_project = dxpy.WORKSPACE_ID elif opt_num == 1: region = None if dxpy.WORKSPACE_ID: region = dxpy.describe(dxpy.WORKSPACE_ID).get("region") query_project = dxpy.find_one_project(name="Reference Genome Files:*", public=True, billed_to="org-dnanexus_apps", level="VIEW", name_mode="glob", region=region)['id'] elif opt_num == 2: project_generator = dxpy.find_projects(level='VIEW', describe=True, explicit_perms=True) print('\nProjects to choose from:') query_project = paginate_and_pick(project_generator, (lambda result: result['describe']['name']))['id'] if opt_num in range(3): result_generator = dxpy.find_data_objects(classname=in_class, typename=param_desc.get('type'), describe=dict(fields=get_ls_l_desc_fields()), project=query_project) print('\nAvailable data:') result_choice = paginate_and_pick(result_generator, (lambda result: get_ls_l_desc(result['describe']))) if result_choice == 'none found': print('No compatible data found') continue elif result_choice == 'none picked': continue else: return [result_choice['project'] + ':' + result_choice['id']] elif opt_num == 3: # Select from previous jobs in current project result_generator = dxpy.find_jobs(project=dxpy.WORKSPACE_ID, describe=True, parent_job="none") print() print('Previously-run jobs to choose from:') result_choice = paginate_and_pick(result_generator, (lambda result: get_find_executions_string(result['describe'], has_children=False, single_result=True)), filter_fn=(lambda result: result['describe']['state'] not in ['unresponsive', 'terminating', 'terminated', 'failed'])) if result_choice == 'none found': print('No jobs found') continue elif result_choice == 'none picked': continue else: if 'output' in result_choice['describe'] and result_choice['describe']['output'] != None: keys = result_choice['describe']['output'].keys() else: exec_handler = dxpy.get_handler(result_choice.get('app', result_choice['applet'])) exec_desc = exec_handler.describe() if 'outputSpec' not in exec_desc: # This if block will either continue, return, or raise print('No output spec found for the executable') try: field = input('Output field to use (^C or <ENTER> to cancel): ') if field == '': continue else: return [result_choice['id'] + ':' + field] except KeyboardInterrupt: continue else: keys = exec_desc['outputSpec'].keys() if len(keys) > 1: print('\nOutput fields to choose from:') field_choice = pick(keys) return [result_choice['id'] + ':' + keys[field_choice]] elif len(keys) == 1: print('Using the only output field: ' + keys[0]) return [result_choice['id'] + ':' + keys[0]] else: print('No available output fields') else: print(fill('Enter an ID or path (<TAB> twice for compatible ' + in_class + 's in current directory)' + (array_help_str if is_array else ''))) return shlex.split(input(prompt)) else: if in_class == 'boolean': if is_array: print(fill('Enter "true", "false"' + array_help_str)) else: print(fill('Enter "true" or "false"')) elif in_class == 'string' and is_array: print(fill('Enter a nonempty string' + array_help_str)) elif (in_class == 'float' or in_class == 'int') and is_array: print(fill('Enter a number' + array_help_str)) elif in_class == 'hash': print(fill('Enter a quoted JSON hash')) result = input(prompt) if in_class == 'string': return [result] else: return shlex.split(result)
def main(): args = parse_args() check_input(args) run_id = get_run_id(args.run_dir) # Set all naming conventions REMOTE_RUN_FOLDER = "/" + run_id + "/runs" REMOTE_READS_FOLDER = "/" + run_id + "/reads" REMOTE_ANALYSIS_FOLDER = "/" + run_id + "/analyses" FILE_PREFIX = "run." + run_id + ".lane." # Prep log & record names lane_info = [] # If no lanes are specified, set lane to all, otherwise, set to array of lanes if not args.num_lanes: lanes_to_upload = ["all"] else: lanes_to_upload = [str(i) for i in range(1, args.num_lanes + 1)] for lane in lanes_to_upload: lane_prefix = FILE_PREFIX + lane lane_info.append({ "lane": lane, "prefix": lane_prefix, "log_path": os.path.join(args.log_dir, lane_prefix + ".log"), "record_name": lane_prefix + ".upload_sentinel", "remote_folder": get_target_folder(REMOTE_RUN_FOLDER, lane), "uploaded": False }) # Create upload sentinel for upload, if record already exists, use that done_count = 0 for lane in lane_info: lane_num = lane["lane"] try: old_record = dxpy.find_one_data_object( zero_ok=True, typename="UploadSentinel", name=lane["record_name"], project=args.project, folder=lane["remote_folder"]) except dxpy.exceptions.DXSearchError as e: raise_error( "Encountered an error looking for %s at %s:%s. %s" % (lane["record_name"], lane["remote_folder"], args.project, e)) if old_record: lane["dxrecord"] = dxpy.get_handler(old_record["id"], project=old_record["project"]) if lane["dxrecord"].describe()["state"] == "closed": print_stderr("Run %s, lane %s has already been uploaded" % (run_id, lane_num)) lane["uploaded"] = True done_count += 1 else: properties = {"run_id": run_id, "lanes": lane_num} lane["dxrecord"] = dxpy.new_dxrecord(types=["UploadSentinel"], project=args.project, folder=lane["remote_folder"], parents=True, name=lane["record_name"], properties=properties) # upload RunInfo here, before uploading any data, unless it is already uploaded. record = lane["dxrecord"] properties = record.get_properties() runInfo = dxpy.find_one_data_object(zero_ok=True, name="RunInfo.xml", project=args.project, folder=lane["remote_folder"]) if not runInfo: lane["runinfo_file_id"] = upload_single_file( args.run_dir + "/RunInfo.xml", args.project, lane["remote_folder"], properties) else: lane["runinfo_file_id"] = runInfo["id"] # Upload samplesheet unless samplesheet-delay is specified or it is already uploaded. if not args.samplesheet_delay: sampleSheet = dxpy.find_one_data_object( zero_ok=True, name="SampleSheet.csv", project=args.project, folder=lane["remote_folder"]) if not sampleSheet: lane["samplesheet_file_id"] = upload_single_file( args.run_dir + "/SampleSheet.csv", args.project, lane["remote_folder"], properties) else: lane["samplesheet_file_id"] = sampleSheet["id"] if done_count == len(lane_info): print_stderr("EXITING: All lanes already uploaded") sys.exit(1) seconds_to_wait = (dxpy.utils.normalize_timedelta(args.run_duration) / 1000 * args.intervals_to_wait) print_stderr("Maximum allowable time for run to complete: %d seconds." % seconds_to_wait) initial_start_time = time.time() # While loop waiting for RTAComplete.txt or RTAComplete.xml while not termination_file_exists(args.novaseq, args.run_dir): start_time = time.time() run_time = start_time - initial_start_time # Fail if run time exceeds total time to wait if run_time > seconds_to_wait: print_stderr( "EXITING: Upload failed. Run did not complete after %d seconds (max wait = %ds)" % (run_time, seconds_to_wait)) sys.exit(1) # Loop through all lanes in run directory for lane in lane_info: lane_num = lane["lane"] if lane["uploaded"]: continue run_sync_dir(lane, args) # Wait at least the minimum time interval before running the loop again cur_time = time.time() diff = cur_time - start_time if diff < args.sync_interval: print_stderr("Sleeping for %d seconds" % (int(args.sync_interval - diff))) time.sleep(int(args.sync_interval - diff)) # Final synchronization, upload data, set details for lane in lane_info: if lane["uploaded"]: continue file_ids = run_sync_dir(lane, args, finish=True) record = lane["dxrecord"] properties = record.get_properties() lane["log_file_id"] = upload_single_file(lane["log_path"], args.project, lane["remote_folder"], properties) for file_id in file_ids: dxpy.get_handler(file_id, project=args.project).set_properties(properties) details = { 'run_id': run_id, 'lanes': lane["lane"], 'upload_thumbnails': str(args.upload_thumbnails).lower(), 'dnanexus_path': args.project + ":" + lane["remote_folder"], 'tar_file_ids': file_ids } # Upload sample sheet here, if samplesheet-delay specified if args.samplesheet_delay: lane["samplesheet_file_id"] = upload_single_file( args.run_dir + "/SampleSheet.csv", args.project, lane["remote_folder"], properties) # ID to singly uploaded file (when uploaded successfully) if lane.get("log_file_id"): details.update({'log_file_id': lane["log_file_id"]}) if lane.get("runinfo_file_id"): details.update({'runinfo_file_id': lane["runinfo_file_id"]}) if lane.get("samplesheet_file_id"): details.update( {'samplesheet_file_id': lane["samplesheet_file_id"]}) record.set_details(details) record.close() print_stderr("Run %s successfully streamed!" % (run_id)) downstream_input = {} if args.downstream_input: try: input_dict = json.loads(args.downstream_input) except ValueError as e: raise_error( "Failed to read downstream input as JSON string. %s. %s" % (args.downstream_input, e)) if not isinstance(input_dict, dict): raise_error("Expected a dict for downstream input. Got %s." % input_dict) for k, v in list(input_dict.items()): if not (isinstance(k, str) and (isinstance(v, str) or isinstance(v, dict))): raise_error( "Expected (string) key and (string or dict) value pairs for downstream input. Got (%s)%s (%s)%s" % (type(k), k, type(v), v)) downstream_input[k] = v if args.applet: # project verified in check_input, assuming no change project = dxpy.get_handler(args.project) print_stderr("Initiating downstream analysis: given app(let) id %s" % args.applet) for info in lane_info: lane = info["lane"] record = info["dxrecord"] # applet verified in check_input, assume no change applet = dxpy.get_handler(args.applet) # Prepare output folder, if downstream analysis specified reads_target_folder = get_target_folder(REMOTE_READS_FOLDER, lane) print_stderr("Creating output folder %s" % (reads_target_folder)) try: project.new_folder(reads_target_folder, parents=True) except dxpy.DXError as e: raise_error("Failed to create new folder %s. %s" % (reads_target_folder, e)) # Decide on job name (<executable>-<run_id>) job_name = applet.title + "-" + run_id # Overwite upload_sentinel_record input of applet to the record of inc upload downstream_input["upload_sentinel_record"] = dxpy.dxlink(record) # Run specified applet job = applet.run(downstream_input, folder=reads_target_folder, project=args.project, name=job_name) print_stderr("Initiated job %s from applet %s for lane %s" % (job, args.applet, lane)) # Close if args.applet # args.workflow and args.applet are mutually exclusive elif args.workflow: # project verified in check_input, assuming no change project = dxpy.get_handler(args.project) print_stderr("Initiating downstream analysis: given workflow id %s" % args.workflow) for info in lane_info: lane = info["lane"] record = info["dxrecord"] # workflow verified in check_input, assume no change workflow = dxpy.get_handler(args.workflow) # Prepare output folder, if downstream analysis specified analyses_target_folder = get_target_folder(REMOTE_ANALYSIS_FOLDER, lane) print_stderr("Creating output folder %s" % (analyses_target_folder)) try: project.new_folder(analyses_target_folder, parents=True) except dxpy.DXError as e: raise_error("Failed to create new folder %s. %s" % (analyses_target_folder, e)) # Decide on job name (<executable>-<run_id>) job_name = workflow.title + "-" + run_id # Overwite upload_sentinel_record input of applet to the record of inc upload downstream_input["0.upload_sentinel_record"] = dxpy.dxlink(record) # Run specified applet job = workflow.run(downstream_input, folder=analyses_target_folder, project=args.project, name=job_name) print_stderr("Initiated analyses %s from workflow %s for lane %s" % (job, args.workflow, lane)) # Close if args.workflow if args.script: # script has been validated to be executable earlier, assume no change try: sub.check_call([args.script, args.run_dir]) except sub.CalledProcessError as e: raise_error("Executable (%s) failed with error %d: %s" % (args.script, e.returncode, e.output))
def check_input(args): dxpy.set_security_context({ "auth_token_type": "Bearer", "auth_token": args.api_token }) # Check API token and project context try: dxpy.get_handler(args.project).describe() except dxpy.exceptions.DXAPIError as e: if e.name == "InvalidAuthentication": raise_error("API token (%s) is not valid. %s" % (args.api_token, e)) if e.name == "PermissionDenied": raise_error("Project (%s) is not valid. %s" % (args.project, e)) except dxpy.exceptions.DXError as e: raise_error("Error getting project handler for project (%s). %s" % (args.project, e)) # Check that chained downstream applet is valid if args.applet: try: dxpy.get_handler(args.applet).describe() except dxpy.exceptions.DXAPIError as e: raise_error("Unable to resolve applet %s. %s" % (args.applet, e)) except dxpy.exceptions.DXError as e: raise_error("Error getting handler for applet (%s). %s" % (args.applet, e)) # Check that chained downstream workflow is valid if args.workflow: try: dxpy.get_handler(args.workflow).describe() except dxpy.exceptions.DXAPIError as e: raise_error("Unable to resolve workflow %s. %s" % (args.workflow, e)) except dxpy.exceptions.DXError as e: raise_error("Error getting handler for workflow (%s). %s" % (args.workflow, e)) # Check that executable to launch locally is executable if args.script: if not (os.path.isfile(args.script) and os.access(args.script, os.X_OK)): raise_error( "Executable/script passed by -s: (%s) is not executable" % (args.script)) if not args.dxpy_upload: print_stderr("Checking if ua is in $PATH") try: sub.check_call(['ua', '--version'], stdout=open(os.devnull, 'w'), close_fds=True) except sub.CalledProcessError: raise_error( "Upload agent executable 'ua' was not found in the $PATH") try: # We assume that dx_sync_directory is located in the same folder as this script # This is resolved by absolute path of invocation sub.check_call([ 'python3', '{curr_dir}/dx_sync_directory.py'.format( curr_dir=sys.path[0]), '-h' ], stdout=open(os.devnull, 'w'), close_fds=True) except sub.CalledProcessError: raise_error( "dx_sync_directory.py not found. Please run incremental " + "upload from the directory containing incremental_upload.py " + "and dx_sync_directory.py")