示例#1
0
            def check_file_content(out_param_name, out_filename, tmp_fname,
                                   str_content):
                """
                Download a file, read it from local disk, and verify that it has the correct contents
                """
                if not out_param_name in job_output:
                    raise "Error: key {} does not appear in the job output".format(
                        out_param_name)
                dxlink = job_output[out_param_name]

                # check that the filename gets preserved
                trg_fname = dxpy.get_handler(dxlink).name
                self.assertEqual(trg_fname, out_filename)

                # download the file and check the contents
                silent_file_remove(tmp_fname)
                dxpy.download_dxfile(dxlink, tmp_fname)
                with open(tmp_fname, "r") as fh:
                    data = fh.read()
                    print(data)
                    if not (strip_white_space(data)
                            == strip_white_space(str_content)):
                        raise Exception(
                            "contents of file {} do not match".format(
                                out_param_name))
                silent_file_remove(tmp_fname)
示例#2
0
def copy_files(fids, projectId, folder, overwrite=False):
    '''Copies array of dx file dicts to project:/folder, returning new array of dx file dicts.'''
    newFids = []
    for fid in fids:
        fileDict = dxpy.describe(FILES[fid])  # FILES contain dxLinks
        if fileDict['project'] == projectId:
            # cannot copy into the same project!!!
            # so just leave in place and pretend that we did!
            #proj = dxpy.DXProject(projectId)
            #proj.move(folder,[fid])
            newFids.append(fid)
            continue

        # Check to see if file already exists.
        alreadyThere = find_file(folder + '/' + fileDict['name'], projectId)
        if alreadyThere is None or overwrite:
            # remove what is alreadyThere?
            #if alreadyThere is not None:
            #    proj = dxpy.DXProject(projectId)
            #    proj.remove_objects([alreadyThere])
            dxFile = dxpy.get_handler(FILES[fid])
            newLink = dxpy.dxlink(dxFile.clone(projectId, folder))
        else:
            newLink = FILES(alreadyThere)
        if newLink == None:
            print "ERROR: Failed in copy of '" + fileDict['project'] + ":" + fileDict['name'] + \
                    "' to '" + projectId + ":" + folder + "'."
            sys.exit(1)
        newDict = dxpy.describe(newLink)
        FILES[newDict['id']] = newLink
        newFids.append(newDict['id'])

    return newFids
示例#3
0
 def format_result(result):
     if return_handler:
         result = dxpy.get_handler(result['id'], project=result.get('project'))
     if by_parent is not None:
         return result, by_parent, descriptions
     else:
         return result
示例#4
0
def list_subfolders(project, path, recurse=True):
    '''
    :param project: Project ID to use as context for the listing
    :type project: string
    :param path: Subtree root path
    :type path: string
    :param recurse: Return a complete subfolders tree
    :type recurse: boolean

    Returns a list of subfolders for the remote *path* (included to the result) of the *project*.

    Example::

        list_subfolders("project-xxxx", folder="/input")

    '''
    project_folders = dxpy.get_handler(project).describe(
        input_params={'folders': True})['folders']
    # TODO: support shell-style path globbing (i.e. /a*/c matches /ab/c but not /a/b/c)
    # return pathmatch.filter(project_folders, os.path.join(path, '*'))
    if recurse:
        return (f for f in project_folders if f.startswith(path))
    else:
        return (f for f in project_folders
                if f.startswith(path) and '/' not in f[len(path) + 1:])
示例#5
0
def file_handler_from_fid(fid):
    '''Returns dx file handler from fid.'''
    try:
        dxlink = FILES[fid]
    except:
        dxlink = dxpy.dxlink(fid)
    return dxpy.get_handler(dxlink)
示例#6
0
文件: dx.py 项目: ENCODE-DCC/dxencode
def file_handler_from_fid(fid):
    '''Returns dx file handler from fid.'''
    try:
        dxlink = FILES[fid]
    except:
        dxlink = dxpy.dxlink(fid)
    return dxpy.get_handler(dxlink)
示例#7
0
文件: dx.py 项目: ENCODE-DCC/dxencode
def copy_files(fids, projectId, folder, overwrite=False):
    '''Copies array of dx file dicts to project:/folder, returning new array of dx file dicts.'''
    newFids = []
    for fid in fids:
        fileDict = dxpy.describe(FILES[fid]) # FILES contain dxLinks
        if fileDict['project'] == projectId:
            # cannot copy into the same project!!!
            # so just leave in place and pretend that we did!
            #proj = dxpy.DXProject(projectId)
            #proj.move(folder,[fid])
            newFids.append( fid )
            continue

        # Check to see if file already exists.
        alreadyThere = find_file(folder+'/'+fileDict['name'],projectId)
        if alreadyThere is None or overwrite:
            # remove what is alreadyThere?
            #if alreadyThere is not None:
            #    proj = dxpy.DXProject(projectId)
            #    proj.remove_objects([alreadyThere])
            dxFile = dxpy.get_handler(FILES[fid])
            newLink = dxpy.dxlink(dxFile.clone(projectId, folder))
        else:
            newLink = FILES(alreadyThere)
        if newLink == None:
            print "ERROR: Failed in copy of '" + fileDict['project'] + ":" + fileDict['name'] + \
                    "' to '" + projectId + ":" + folder + "'."
            sys.exit(1)
        newDict = dxpy.describe(newLink)
        FILES[newDict['id']] = newLink
        newFids.append( newDict['id'] )

    return newFids
示例#8
0
def job_describe(job_id, key=None, verbose=False):
    '''Returns dx job's description property matching 'key'.'''

    dxjob = None
    try:
        dxjob = dxpy.get_handler(job_id)
    except:
        sys.stderr.write('ERROR: unable to find job: "' + job_id + '": \n')
        sys.exit(0)  # Do not error on tool run in dx script

    desciption = dxjob.describe()

    if not desciption:
        sys.stderr.write('ERROR: unable to find description of job "' +
                         job_id + '": \n')
        sys.exit(0)  # Do not error on tool run in dx script

    if key == None:
        if verbose:
            sys.stderr.write(json.dumps(desciption) + '\n')
        return desciption

    if key not in desciption:
        sys.stderr.write('ERROR: unable to find "' + key +
                         '" in description of job "' + job_id + '": \n')
        sys.exit(0)  # Do not error on tool run in dx script
    value = desciption[key]

    if verbose:
        sys.stderr.write(value + '\n')

    return value
示例#9
0
 def format_result(result):
     if return_handler:
         result = dxpy.get_handler(result['id'], project=result.get('project'))
     if by_parent is not None:
         return result, by_parent, descriptions
     else:
         return result
示例#10
0
def job_describe(job_id,key=None,verbose=False):
    '''Returns dx job's description property matching 'key'.'''

    dxjob = None
    try:
        dxjob = dxpy.get_handler(job_id)
    except:
        sys.stderr.write('WARNING: unable to find job: "' + job_id + '": \n')
        sys.exit(0)  # Do not error on tool run in dx script 
    
    desciption = dxjob.describe()
        
    if not desciption:
        sys.stderr.write('WARNING: unable to find description of job "' + job_id + '": \n') 
        sys.exit(0)  # Do not error on tool run in dx script 
    
    if key == None:
        if verbose:
            sys.stderr.write(json.dumps(desciption) + '\n')
        return desciption
    
    if key not in desciption:
        sys.stderr.write('WARNING: unable to find "'+key+'" in description of job "' + job_id + '": \n') 
        sys.exit(0)  # Do not error on tool run in dx script
    value = desciption[key]
         
    if verbose:
        sys.stderr.write(value + '\n')
    
    return value
示例#11
0
    def unpack_tar(self, tar_file_dxlink):
        '''
        DEV: Eventually integrate dx-toolkit into trajectoread repo so I can 
             transition to using 'dx-download-all-inputs' to handle unpacking
             all input files.
             Pipeline used to store lane file dxids as project properties 
             and then pass to "dx download"
        Description: Download and untar metadata and lane data files 
                     (/Data/Intensities/BaseCalls)
        '''

        if dxpy.is_dxlink(tar_file_dxlink):
            file_handler = dxpy.get_handler(tar_file_dxlink)
            filename = file_handler.name
        else:
            print 'Error: Cannot unpack %s; not a valid DXLink object'
            sys.exit()

        # ('file-dxid', 'project-dxid') = dxpy.get_dxlink_ids(dxlink)
        file_dxid = dxpy.get_dxlink_ids(tar_file_dxlink)[0]
        project_id = dxpy.get_dxlink_ids(tar_file_dxlink)[1]

        # Download file from DNAnexus objectstore to virtual machine
        dxpy.download_dxfile(dxid=file_dxid,
                             filename=filename,
                             project=project_id)

        # Untar file
        ## DEV: Check if this is even in use anymore; also should have some method for
        ## checking what type of compression was used.
        ## But I don't think this is in use
        command = 'tar -xf %s --owner root --group root --no-same-owner' % filename
        self.createSubprocess(cmd=command, pipeStdout=False)
示例#12
0
    def test_dxfs_operations(self):
        # FIXME: Make the mount live or add command to refresh it with remote changes
        #subprocess.check_call(['dx', 'mkdir', 'foo'])
        #subprocess.check_call(['dx', 'mkdir', 'bar'])
        #subprocess.check_call(['dx', 'mkdir', '-p', '/bar/baz'])

        self.assertEqual(set(os.listdir(self.mountpoint)),
                         set(['foo', 'bar',
                              os.path.basename(__file__)]))

        # Reading
        self.assertEqual(
            open(__file__).read(),
            open(os.path.join(self.mountpoint, __file__)).read())

        # Moving
        shutil.move(os.path.join(self.mountpoint, __file__),
                    os.path.join(self.mountpoint, __file__ + "2"))
        self.assertEqual(set(os.listdir(self.mountpoint)),
                         set(['foo', 'bar',
                              os.path.basename(__file__ + "2")]))
        shutil.move(os.path.join(self.mountpoint, __file__ + "2"),
                    os.path.join(self.mountpoint, "foo"))
        self.assertEqual(set(os.listdir(os.path.join(self.mountpoint, 'foo'))),
                         set([os.path.basename(__file__ + "2")]))
        folder_listing = self.project.list_folder('/foo')
        self.assertEqual(len(folder_listing['folders']), 0)
        self.assertEqual(len(folder_listing['objects']), 1)
        self.assertEqual(
            dxpy.get_handler(folder_listing['objects'][0]['id']).name,
            os.path.basename(__file__ + "2"))
        self.assertEqual(
            open(__file__).read(),
            open(os.path.join(self.mountpoint, 'foo', __file__ + "2")).read())
示例#13
0
def main():

    app = my_app.create_app()
    dxhandler = dxpy.get_handler(dxpy.JOB_ID)
    dxhandler.set_properties({"httpsAppState": "running"})
    app.run_server(host='0.0.0.0', port=443)

    return 1
示例#14
0
def main():

    app = my_app.create_app()
    dxhandler = dxpy.get_handler(dxpy.JOB_ID)
    dxhandler.set_properties({"httpsAppState": "running"})
    app.run_server(host='0.0.0.0', port=443)

    return 1
示例#15
0
 def verify_files_in_dir(path, expected_filenames, dxproj):
     """ verify that a particular set of files resides in a directory """
     dir_listing = dxproj.list_folder(folder=path, only="objects")
     for elem in dir_listing["objects"]:
         handler = dxpy.get_handler(elem["id"])
         if not isinstance(handler, dxpy.DXFile):
             continue
         if handler.name not in expected_filenames:
             raise Exception("Error: file {} should reside in directory {}".format(handler.name, path))
示例#16
0
    def test_deepdirs(self):
        """ Tests the use of subdirectories in the output directory """

        def check_output_key(job_output, out_param_name, num_files, dxproj):
            """ check that an output key appears, and has the correct number of files """
            print("checking output for param={}".format(out_param_name))
            if out_param_name not in job_output:
                raise "Error: key {} does not appear in the job output".format(out_param_name)
            dxlink_id_list = job_output[out_param_name]
            if not len(dxlink_id_list) == num_files:
                raise Exception(
                    "Error: key {} should have {} files, but has {}".format(
                        out_param_name, num_files, len(dxlink_id_list)
                    )
                )

        def verify_files_in_dir(path, expected_filenames, dxproj):
            """ verify that a particular set of files resides in a directory """
            dir_listing = dxproj.list_folder(folder=path, only="objects")
            for elem in dir_listing["objects"]:
                handler = dxpy.get_handler(elem["id"])
                if not isinstance(handler, dxpy.DXFile):
                    continue
                if handler.name not in expected_filenames:
                    raise Exception("Error: file {} should reside in directory {}".format(handler.name, path))

        with temporary_project("TestDXBashHelpers.test_app1 temporary project") as dxproj:
            env = update_environ(DX_PROJECT_CONTEXT_ID=dxproj.get_id())

            # Build the applet, patching in the bash helpers from the
            # local checkout
            applet_id = build_app_with_bash_helpers(os.path.join(TEST_APPS, "deepdirs"), dxproj.get_id())

            # Run the applet
            cmd_args = ["dx", "run", "--yes", "--brief", applet_id]
            job_id = run(cmd_args, env=env).strip()

            dxpy.DXJob(job_id).wait_on_done()

            print("Test completed successfully, checking outputs\n")

            # Assertions about the output. There should be three result keys
            job_handler = dxpy.get_handler(job_id)
            job_output = job_handler.output

            check_output_key(job_output, "genes", 8, dxproj)
            check_output_key(job_output, "phenotypes", 7, dxproj)
            check_output_key(job_output, "report", 1, dxproj)
            check_output_key(job_output, "helix", 1, dxproj)

            verify_files_in_dir("/clue", ["X_1.txt", "X_2.txt", "X_3.txt"], dxproj)
            verify_files_in_dir("/hint", ["V_1.txt", "V_2.txt", "V_3.txt"], dxproj)
            verify_files_in_dir("/clue2", ["Y_1.txt", "Y_2.txt", "Y_3.txt"], dxproj)
            verify_files_in_dir("/hint2", ["Z_1.txt", "Z_2.txt", "Z_3.txt"], dxproj)
            verify_files_in_dir("/foo/bar", ["luke.txt"], dxproj)
            verify_files_in_dir("/", ["A.txt", "B.txt", "C.txt", "num_chrom.txt"], dxproj)
示例#17
0
 def verify_files_in_dir(path, expected_filenames, dxproj):
     ''' verify that a particular set of files resides in a directory '''
     dir_listing = dxproj.list_folder(folder=path, only="objects")
     for elem in dir_listing["objects"]:
         handler = dxpy.get_handler(elem["id"])
         if not isinstance(handler, dxpy.DXFile):
             continue
         if handler.name not in expected_filenames:
             raise Exception("Error: file {} should reside in directory {}".
                             format(handler.name, path))
示例#18
0
def upload_resources(src_dir, project=None, folder='/'):
    """
    :returns: A list (possibly empty) of references to the generated archive(s)
    :rtype: list

    If it exists, archives and uploads the contents of the
    ``resources/`` subdirectory of *src_dir* to a new remote file
    object, and returns a list describing a single bundled dependency in
    the form expected by the ``bundledDepends`` field of a run
    specification. Returns an empty list, if no archive was created.
    """
    applet_spec = _get_applet_spec(src_dir)

    if project is None:
        dest_project = applet_spec['project']
    else:
        dest_project = project
        applet_spec['project'] = project

    resources_dir = os.path.join(src_dir, "resources")
    if os.path.exists(resources_dir) and len(os.listdir(resources_dir)) > 0:
        logger.debug("Uploading in " + src_dir)

        with tempfile.NamedTemporaryFile(suffix=".tar.gz") as tar_fh:
            subprocess.check_call(
                ['tar', '-C', resources_dir, '-czf', tar_fh.name, '.'])
            if 'folder' in applet_spec:
                try:
                    dxpy.get_handler(dest_project).new_folder(
                        applet_spec['folder'], parents=True)
                except dxpy.exceptions.DXAPIError:
                    pass  # TODO: make this better
            target_folder = applet_spec[
                'folder'] if 'folder' in applet_spec else folder
            dx_resource_archive = dxpy.upload_local_file(tar_fh.name,
                                                         wait_on_close=True,
                                                         project=dest_project,
                                                         folder=target_folder,
                                                         hidden=True)
            archive_link = dxpy.dxlink(dx_resource_archive.get_id())
            return [{'name': 'resources.tar.gz', 'id': archive_link}]
    else:
        return []
示例#19
0
def main():
    job = dxpy.DXJob(dxid=sys.argv[1])
    ccds_seqs = load_ccds_seqs(sys.argv[2])
    alignments = dxpy.get_handler(job.describe()['output']['alignments'])
    alignments_desc = alignments.describe()

    # verify basic table properties
    assert 'CrossSpeciesAlignments' in alignments_desc['types']
    assert alignments_desc['length'] == 318

    # verify table schema
    cols = alignments.get_columns()
    assert len(cols) == len(expected_colnames)
    for i in xrange(len(expected_colnames)):
        assert cols[i]['type'] == 'string'
        assert cols[i]['name'] == expected_colnames[i]

    # verify table contents
    names = []

    bps = 0
    alns = [0 for i in xrange(3, len(expected_colnames))]
    ids = [0 for i in xrange(3, len(expected_colnames))]

    for row in alignments.iterate_rows():
        ccds_id = row[1]
        names.append(ccds_id)
        assert len(row) == 1 + len(expected_colnames)
        # check length of each consensus sequence
        for col in xrange(4, len(row)):
            assert len(row[col]) == len(row[3])
        # check that human sequence matches CCDS
        hs_seq = str(row[3]).translate(None, '-').upper()
        ccds_seq = ccds_seqs[ccds_id].upper()
        if len(ccds_seq) == 3 + len(hs_seq):  # stop codon
            ccds_seq = ccds_seq[:len(ccds_seq) - 3]
        if hs_seq != ccds_seq:
            print 'Sequence mismatch for {}'.format(ccds_id)
            print 'Sequence found in alignments table: {}'.format(hs_seq)
            print 'Expected sequence from CCDS: {}'.format(ccds_seq)
            assert False
        # collect alignment statistics
        bps = bps + len(hs_seq)
        for i in xrange(4, 1 + len(expected_colnames)):
            a, s = similarity(row[3], row[i])
            alns[i - 4] = alns[i - 4] + a
            ids[i - 4] = ids[i - 4] + s

    assert len(set(names)) == 318

    print 'Alignment quality statistics (make sure these look reasonable):'
    print 'Informant\tAligned %\tIdentity %'
    for i in xrange(len(expected_colnames) - 3):
        print '{}\t{}\t{}'.format(expected_colnames[i + 3].rjust(12),
                                  alns[i] * 100 / bps, ids[i] * 100 / alns[i])
示例#20
0
    def test_get_handler(self):
        dxpy.set_workspace_id(self.second_proj_id)

        dxrecord = dxpy.new_dxrecord(project=self.proj_id)
        # Simple DXLink
        dxlink = {'$dnanexus_link': dxrecord.get_id()}
        handler = dxpy.get_handler(dxlink)
        self.assertEqual(handler.get_id(), dxrecord.get_id())
        # Default project is not going to be the correct one
        self.assertNotEqual(handler.get_proj_id(), self.proj_id)

        # Extended DXLink
        dxlink = {'$dnanexus_link': {'id': dxrecord.get_id(),
                                     'project': self.proj_id}}
        handler = dxpy.get_handler(dxlink)
        self.assertEqual(handler.get_id(), dxrecord.get_id())
        self.assertEqual(handler.get_proj_id(), self.proj_id)

        # Handle project IDs
        dxproject = dxpy.get_handler(self.proj_id)
示例#21
0
文件: verify.py 项目: mlin/dxMAF
def main():
  job=dxpy.DXJob(dxid=sys.argv[1])
  ccds_seqs=load_ccds_seqs(sys.argv[2])
  alignments = dxpy.get_handler(job.describe()['output']['alignments'])
  alignments_desc = alignments.describe()

  # verify basic table properties
  assert 'CrossSpeciesAlignments' in alignments_desc['types']
  assert alignments_desc['length'] == 318

  # verify table schema
  cols = alignments.get_columns()
  assert len(cols) == len(expected_colnames)
  for i in xrange(len(expected_colnames)):
    assert cols[i]['type'] == 'string'
    assert cols[i]['name'] == expected_colnames[i]

  # verify table contents
  names = []

  bps = 0
  alns = [0 for i in xrange(3, len(expected_colnames))]
  ids = [0 for i in xrange(3, len(expected_colnames))]

  for row in alignments.iterate_rows():
    ccds_id = row[1]
    names.append(ccds_id)
    assert len(row) == 1+len(expected_colnames)
    # check length of each consensus sequence
    for col in xrange(4,len(row)):
      assert len(row[col]) == len(row[3])
    # check that human sequence matches CCDS
    hs_seq = str(row[3]).translate(None, '-').upper()
    ccds_seq = ccds_seqs[ccds_id].upper()
    if len(ccds_seq) == 3+len(hs_seq): # stop codon
      ccds_seq = ccds_seq[:len(ccds_seq)-3]
    if hs_seq != ccds_seq:
      print 'Sequence mismatch for {}'.format(ccds_id)
      print 'Sequence found in alignments table: {}'.format(hs_seq)
      print 'Expected sequence from CCDS: {}'.format(ccds_seq)
      assert False
    # collect alignment statistics
    bps = bps + len(hs_seq)
    for i in xrange(4, 1+len(expected_colnames)):
      a, s = similarity(row[3],row[i])
      alns[i-4] = alns[i-4] + a
      ids[i-4] = ids[i-4] + s

  assert len(set(names)) == 318

  print 'Alignment quality statistics (make sure these look reasonable):'
  print 'Informant\tAligned %\tIdentity %'
  for i in xrange(len(expected_colnames)-3):
    print '{}\t{}\t{}'.format(expected_colnames[i+3].rjust(12), alns[i]*100/bps, ids[i]*100/alns[i])
示例#22
0
def _list_subfolders(project, path, cached_folder_lists, recurse=True):
    if project not in cached_folder_lists:
        cached_folder_lists[project] = dxpy.get_handler(project).describe(
            input_params={'folders': True}
        )['folders']
    # TODO: support shell-style path globbing (i.e. /a*/c matches /ab/c but not /a/b/c)
    # return pathmatch.filter(cached_folder_lists[project], os.path.join(path, '*'))
    if recurse:
        return (f for f in cached_folder_lists[project] if f.startswith(path))
    else:
        return (f for f in cached_folder_lists[project] if f.startswith(path) and '/' not in f[len(path)+1:])
示例#23
0
def _list_subfolders(project, path, cached_folder_lists, recurse=True):
    if project not in cached_folder_lists:
        cached_folder_lists[project] = dxpy.get_handler(project).describe(
            input_params={'folders': True})['folders']
    # TODO: support shell-style path globbing (i.e. /a*/c matches /ab/c but not /a/b/c)
    # return pathmatch.filter(cached_folder_lists[project], os.path.join(path, '*'))
    if recurse:
        return (f for f in cached_folder_lists[project] if f.startswith(path))
    else:
        return (f for f in cached_folder_lists[project]
                if f.startswith(path) and '/' not in f[len(path) + 1:])
示例#24
0
    def test_deepdirs(self):
        ''' Tests the use of subdirectories in the output directory '''
        def check_output_key(job_output, out_param_name, num_files, dxproj):
            ''' check that an output key appears, and has the correct number of files '''
            print('checking output for param={}'.format(out_param_name))
            if out_param_name not in job_output:
                raise "Error: key {} does not appear in the job output".format(out_param_name)
            dxlink_id_list = job_output[out_param_name]
            if not len(dxlink_id_list) == num_files:
                raise Exception("Error: key {} should have {} files, but has {}".
                                format(out_param_name, num_files, len(dxlink_id_list)))

        def verify_files_in_dir(path, expected_filenames, dxproj):
            ''' verify that a particular set of files resides in a directory '''
            dir_listing = dxproj.list_folder(folder=path, only="objects")
            for elem in dir_listing["objects"]:
                handler = dxpy.get_handler(elem["id"])
                if not isinstance(handler, dxpy.DXFile):
                    continue
                if handler.name not in expected_filenames:
                    raise Exception("Error: file {} should reside in directory {}".
                                    format(handler.name, path))

        with temporary_project('TestDXBashHelpers.test_app1 temporary project') as dxproj:
            env = update_environ(DX_PROJECT_CONTEXT_ID=dxproj.get_id())

            # Build the applet, patching in the bash helpers from the
            # local checkout
            applet_id = build_app_with_bash_helpers(os.path.join(TEST_APPS, 'deepdirs'), dxproj.get_id())

            # Run the applet
            cmd_args = ['dx', 'run', '--yes', '--brief', applet_id]
            job_id = run(cmd_args, env=env).strip()

            dxpy.DXJob(job_id).wait_on_done()

            print("Test completed successfully, checking outputs\n")

            # Assertions about the output. There should be three result keys
            job_handler = dxpy.get_handler(job_id)
            job_output = job_handler.output

            check_output_key(job_output, "genes", 8, dxproj)
            check_output_key(job_output, "phenotypes", 7, dxproj)
            check_output_key(job_output, "report", 1, dxproj)
            check_output_key(job_output, "helix", 1, dxproj)

            verify_files_in_dir("/clue", ["X_1.txt", "X_2.txt", "X_3.txt"], dxproj)
            verify_files_in_dir("/hint", ["V_1.txt", "V_2.txt", "V_3.txt"], dxproj)
            verify_files_in_dir("/clue2", ["Y_1.txt", "Y_2.txt", "Y_3.txt"], dxproj)
            verify_files_in_dir("/hint2", ["Z_1.txt", "Z_2.txt", "Z_3.txt"], dxproj)
            verify_files_in_dir("/foo/bar", ["luke.txt"], dxproj)
            verify_files_in_dir("/", ["A.txt", "B.txt", "C.txt", "num_chrom.txt"], dxproj)
示例#25
0
def cp(args):
    dest_proj, dest_path, _none = try_call(resolve_path,
                                           args.destination, 'folder')
    if dest_path is None:
        parser.exit(1, 'Cannot copy to a hash ID\n')
    dx_dest = dxpy.get_handler(dest_proj)
    try:
        # check if the destination exists
        dx_dest.list_folder(folder=dest_path, only='folders')
    except:
        cp_to_noexistent_destination(args, dest_path, dx_dest, dest_proj)
        return

    # The destination exists, we need to copy all of the sources to it.
    if len(args.sources) == 0:
        parser.exit(1, 'No sources provided to copy to another project\n')
    src_objects = []
    src_folders = []
    for source in args.sources:
        src_proj, src_folderpath, src_results = try_call(resolve_existing_path,
                                                         source,
                                                         allow_mult=True, all_mult=args.all)
        if src_proj == dest_proj:
            if is_hashid(source):
                # This is the only case in which the source project is
                # purely assumed, so give a better error message.
                parser.exit(1, fill('Error: You must specify a source project for ' + source) + '\n')
            else:
                parser.exit(1, fill('Error: A source path and the destination path resolved ' +
                                    'to the same project or container. Please specify ' +
                                    'different source and destination containers, e.g.') +
                            '\n  dx cp source-project:source-id-or-path dest-project:dest-path' + '\n')

        if src_proj is None:
            parser.exit(1, fill('Error: A source project must be specified or a current ' +
                                'project set in order to clone objects between projects') + '\n')

        if src_results is None:
            src_folders.append(src_folderpath)
        else:
            src_objects += [result['id'] for result in src_results]
    try:
        exists = dxpy.DXHTTPRequest('/' + src_proj + '/clone',
                                    {"objects": src_objects,
                                     "folders": src_folders,
                                     "project": dest_proj,
                                     "destination": dest_path})['exists']
        if len(exists) > 0:
            print(fill('The following objects already existed in the destination container ' +
                       'and were left alone:') + '\n ' + '\n '.join(exists))
    except:
        err_exit()
def resolve_project(identifier, privs="r"):
    project = dxpy.find_one_project(name=identifier, level="VIEW", name_mode="exact", return_handler=True, zero_ok=True)
    if project == None:
        try:
            project = dxpy.get_handler(identifier)
        except:
            logging.error("Could not find a unique project with name or id %s" % (identifier))
            raise ValueError(identifier)
    logging.debug("Project %s access level is %s" % (project.name, project.describe()["level"]))
    if privs == "w" and project.describe()["level"] == "VIEW":
        logging.error("Output project %s is read-only" % (identifier))
        raise ValueError(identifier)
    return project
示例#27
0
def resolve_project(identifier, privs='r'):
    project = dxpy.find_one_project(name=identifier, level='VIEW', name_mode='exact', return_handler=True, zero_ok=True)
    if project == None:
        try:
            project = dxpy.get_handler(identifier)
        except:
            logging.error('Could not find a unique project with name or id %s' %(identifier))
            raise ValueError(identifier)
    logging.debug('Project %s access level is %s' %(project.name, project.describe()['level']))
    if privs == 'w' and project.describe()['level'] == 'VIEW':
        logging.error('Output project %s is read-only' %(identifier))
        raise ValueError(identifier)
    return project
示例#28
0
def resolve_project(identifier, privs='r'):
    project = dxpy.find_one_project(name=identifier, level='VIEW', name_mode='exact', return_handler=True, zero_ok=True)
    if project == None:
        try:
            project = dxpy.get_handler(identifier)
        except:
            logging.error('Could not find a unique project with name or id %s' %(identifier))
            raise ValueError(identifier)
    logging.debug('Project %s access level is %s' %(project.name, project.describe()['level']))
    if privs == 'w' and project.describe()['level'] == 'VIEW':
        logging.error('Output project %s is read-only' %(identifier))
        raise ValueError(identifier)
    return project
def check_input(args):
    dxpy.set_security_context({
                "auth_token_type": "Bearer",
                "auth_token": args.api_token})

    # Check API token and project context
    try:
        dxpy.get_handler(args.project).describe()
    except dxpy.exceptions.DXAPIError as e:
        if e.name == "InvalidAuthentication":
            raise_error("API token (%s) is not valid. %s"
                    % (args.api_token, e))
        if e.name == "PermissionDenied":
            raise_error("Project (%s) is not valid. %s"
                    % (args.project, e))
    except dxpy.exceptions.DXError as e:
        raise_error("Error getting project handler for project (%s). %s" %
                (args.project, e))

    # Check that chained downstream applet is valid
    if args.applet:
        try:
            dxpy.get_handler(args.applet).describe()
        except dxpy.exceptions.DXAPIError as e:
            raise_error("Unable to resolve applet %s. %s" %(args.applet, e))
        except dxpy.exceptions.DXError as e:
            raise_error("Error getting handler for applet (%s). %s" %(args.applet, e))

    # Check that chained downstream workflow is valid
    if args.workflow:
        try:
            dxpy.get_handler(args.workflow).describe()
        except dxpy.exceptions.DXAPIError as e:
            raise_error("Unable to resolve workflow %s. %s" %(args.workflow, e))
        except dxpy.exceptions.DXError as e:
            raise_error("Error getting handler for workflow (%s). %s" %(args.workflow, e))

    # Check that executable to launch locally is executable
    if args.script:
        if not (os.path.isfile(args.script) and os.access(args.script, os.X_OK)):
            raise_error("Executable/script passed by -s: (%s) is not executable" %(args.script))

    if not args.dxpy_upload:
        print_stderr("Checking if ua is in $PATH")
        try:
            sub.check_call(['ua', '--version'],
                    stdout=open(os.devnull, 'w'), close_fds=True)
        except sub.CalledProcessError:
            raise_error("Upload agent executable 'ua' was not found in the $PATH")

    try:
        # We assume that dx_sync_directory is located in the same folder as this script
        # This is resolved by absolute path of invocation
        sub.check_call(['python', '{curr_dir}/dx_sync_directory.py'.format(curr_dir=sys.path[0]), '-h'],
                stdout=open(os.devnull, 'w'), close_fds=True)
    except sub.CalledProcessError:
        raise_error("dx_sync_directory.py not found. Please run incremental " +
                "upload from the directory containing incremental_upload.py "+
                "and dx_sync_directory.py")
示例#30
0
def main(**kwargs):
    if len(kwargs) == 0:
        args = parser.parse_args(sys.argv[1:])
    else:
        args = parser.parse_args(kwargs)

    # Attempt to resolve name
    try:
        project, folderpath, entity_result = resolve_existing_path(args.path, expected='entity')
    except ResolutionError as details:
        parser.exit(1, fill(unicode(details)) + '\n')

    if entity_result is None:
        parser.exit(1, fill('Could not resolve ' + args.path + ' to a data object') + '\n')

    filename = args.output
    if filename is None:
        filename = entity_result['describe']['name'].replace('/', '%2F')

    dxtable = dxpy.get_handler(entity_result['id'])

    delimiter = ',' if args.csv else '\t'
    if args.output == '-':
        writer = csv.writer(sys.stdout, delimiter=delimiter)
    else:
        if args.output is None and not args.no_ext:
            filename += '.csv' if args.csv else '.tsv'
        if not args.overwrite and os.path.exists(filename):
            parser.exit(1, fill('Error: path \"' + filename + '\" already exists but -f/--overwrite was not set') + '\n')
        writer = csv.writer(open(filename, 'wb'),
                            delimiter=delimiter)
    if not args.no_header:
        writer.writerow((['__id__:int'] if args.rowid else []) + [(col['name'] + ':' + col['type']) for col in dxtable.describe()['columns']])

    # Query stuff
    if args.gri is not None:
        try:
            lo = int(args.gri[1])
            hi = int(args.gri[2])
        except:
            parser.exit(1, fill('Error: the LO and HI arguments to --gri must be integers') + '\n')
        gri_query = dxpy.DXGTable.genomic_range_query(args.gri[0],
                                                      lo,
                                                      hi,
                                                      args.gri_mode,
                                                      args.gri_name)
        iterator = dxtable.iterate_query_rows(query=gri_query, limit=args.limit)
    else:
        iterator = dxtable.iterate_rows(start=args.starting, end=(None if args.limit is None else args.starting + args.limit))
    for row in iterator:
        writer.writerow([unicode(item).encode('utf-8') for item in row[0 if args.rowid else 1:]])
示例#31
0
def main(**kwargs):
    if len(kwargs) == 0:
        args = parser.parse_args(sys.argv[1:])
    else:
        args = parser.parse_args(kwargs)

    # Attempt to resolve name
    try:
        project, folderpath, entity_result = resolve_existing_path(args.path, expected='entity')
    except ResolutionError as details:
        parser.exit(1, fill(unicode(details)) + '\n')

    if entity_result is None:
        parser.exit(1, fill('Could not resolve ' + args.path + ' to a data object') + '\n')

    filename = args.output
    if filename is None:
        filename = entity_result['describe']['name'].replace('/', '%2F')

    dxtable = dxpy.get_handler(entity_result['id'])

    delimiter = ',' if args.csv else '\t'
    if args.output == '-':
        writer = csv.writer(sys.stdout, delimiter=delimiter)
    else:
        if args.output is None and not args.no_ext:
            filename += '.csv' if args.csv else '.tsv'
        if not args.overwrite and os.path.exists(filename):
            parser.exit(1, fill('Error: path \"' + filename + '\" already exists but -f/--overwrite was not set') + '\n')
        writer = csv.writer(open(filename, 'wb'),
                            delimiter=delimiter)
    if not args.no_header:
        writer.writerow((['__id__:int'] if args.rowid else []) + [(col['name'] + ':' + col['type']) for col in dxtable.describe()['columns']])

    # Query stuff
    if args.gri is not None:
        try:
            lo = int(args.gri[1])
            hi = int(args.gri[2])
        except:
            parser.exit(1, fill('Error: the LO and HI arguments to --gri must be integers') + '\n')
        gri_query = dxpy.DXGTable.genomic_range_query(args.gri[0],
                                                      lo,
                                                      hi,
                                                      args.gri_mode,
                                                      args.gri_name)
        iterator = dxtable.iterate_query_rows(query=gri_query, limit=args.limit)
    else:
        iterator = dxtable.iterate_rows(start=args.starting, end=(None if args.limit is None else args.starting + args.limit))
    for row in iterator:
        writer.writerow([unicode(item).encode('utf-8') for item in row[0 if args.rowid else 1:]])
示例#32
0
def new_workflow(args):
    try_call(process_dataobject_args, args)
    try_call(process_single_dataobject_output_args, args)
    init_from = None
    if args.init is not None:
        if is_analysis_id(args.init):
            init_from = args.init
        else:
            init_project, _init_folder, init_result = try_call(
                resolve_existing_path, args.init, expected='entity')
            init_from = dxpy.get_handler(init_result['id'],
                                         project=init_project)
    if args.output is None:
        project = dxpy.WORKSPACE_ID
        folder = dxpy.config.get("DX_CLI_WD", "/")
        name = None
    else:
        project, folder, name = try_call(dxpy.utils.resolver.resolve_path,
                                         args.output)
    if args.output_folder is not None:
        try:
            # Try to resolve to a path in the project
            _ignore, args.output_folder, _ignore = resolve_path(
                args.output_folder, expected='folder')
        except:
            # But if not, just use the value directly
            pass

    try:
        dxworkflow = dxpy.new_dxworkflow(title=args.title,
                                         summary=args.summary,
                                         description=args.description,
                                         output_folder=args.output_folder,
                                         project=project,
                                         name=name,
                                         tags=args.tags,
                                         types=args.types,
                                         hidden=args.hidden,
                                         properties=args.properties,
                                         details=args.details,
                                         folder=folder,
                                         parents=args.parents,
                                         init_from=init_from)
        if args.brief:
            print(dxworkflow.get_id())
        else:
            dxpy.utils.describe.print_desc(
                dxworkflow.describe(incl_properties=True, incl_details=True),
                args.verbose)
    except:
        err_exit()
示例#33
0
def cp(args):
    dest_proj, dest_path, _none = try_call(resolve_path, args.destination, expected='folder')
    if dest_path is None:
        raise DXCLIError('Cannot copy to a hash ID')
    dx_dest = dxpy.get_handler(dest_proj)
    try:
        # check if the destination exists
        dx_dest.list_folder(folder=dest_path, only='folders')
    except:
        cp_to_noexistent_destination(args, dest_path, dx_dest, dest_proj)
        return

    # The destination exists, we need to copy all of the sources to it.
    if len(args.sources) == 0:
        raise DXCLIError('No sources provided to copy to another project')
    src_objects = []
    src_folders = []
    for source in args.sources:
        src_proj, src_folderpath, src_results = try_call(resolve_existing_path,
                                                         source,
                                                         allow_mult=True, all_mult=args.all)
        if src_proj == dest_proj:
            if is_hashid(source):
                # This is the only case in which the source project is
                # purely assumed, so give a better error message.
                raise DXCLIError(fill('Error: You must specify a source project for ' + source))
            else:
                raise DXCLIError(fill('Error: A source path and the destination path resolved ' +
                                    'to the same project or container. Please specify ' +
                                    'different source and destination containers, e.g.') +
                                 '\n  dx cp source-project:source-id-or-path dest-project:dest-path')

        if src_proj is None:
            raise DXCLIError(fill('Error: A source project must be specified or a current ' +
                                  'project set in order to clone objects between projects'))

        if src_results is None:
            src_folders.append(src_folderpath)
        else:
            src_objects += [result['id'] for result in src_results]
    try:
        exists = dxpy.DXHTTPRequest('/' + src_proj + '/clone',
                                    {"objects": src_objects,
                                     "folders": src_folders,
                                     "project": dest_proj,
                                     "destination": dest_path})['exists']
        if len(exists) > 0:
            print(fill('The following objects already existed in the destination container ' +
                       'and were left alone:') + '\n ' + '\n '.join(exists))
    except:
        err_exit()
示例#34
0
 def add_file(iname, subdir, value):
     if not dxpy.is_dxlink(value):
         return
     handler = dxpy.get_handler(value)
     if not isinstance(handler, dxpy.DXFile):
         return
     filename = make_unix_filename(handler.name)
     trg_dir = iname
     if subdir is not None:
         trg_dir = os.path.join(trg_dir, subdir)
     files[iname].append({'trg_fname': os.path.join(trg_dir, filename),
                          'handler': handler,
                          'src_file_id': handler.id})
     dirs.append(trg_dir)
示例#35
0
def upload_resources(src_dir, project=None, folder='/'):
    """
    :returns: A list (possibly empty) of references to the generated archive(s)
    :rtype: list

    If it exists, archives and uploads the contents of the
    ``resources/`` subdirectory of *src_dir* to a new remote file
    object, and returns a list describing a single bundled dependency in
    the form expected by the ``bundledDepends`` field of a run
    specification. Returns an empty list, if no archive was created.
    """
    applet_spec = _get_applet_spec(src_dir)

    if project is None:
        dest_project = applet_spec['project']
    else:
        dest_project = project
        applet_spec['project'] = project

    resources_dir = os.path.join(src_dir, "resources")
    if os.path.exists(resources_dir) and len(os.listdir(resources_dir)) > 0:
        logger.debug("Uploading in " + src_dir)

        with tempfile.NamedTemporaryFile(suffix=".tar.gz") as tar_fh:
            subprocess.check_call(['tar', '-C', resources_dir, '-czf', tar_fh.name, '.'])
            if 'folder' in applet_spec:
                try:
                    dxpy.get_handler(dest_project).new_folder(applet_spec['folder'], parents=True)
                except dxpy.exceptions.DXAPIError:
                    pass # TODO: make this better
            target_folder = applet_spec['folder'] if 'folder' in applet_spec else folder
            dx_resource_archive = dxpy.upload_local_file(tar_fh.name, wait_on_close=True,
                                                         project=dest_project, folder=target_folder, hidden=True)
            archive_link = dxpy.dxlink(dx_resource_archive.get_id())
            return [{'name': 'resources.tar.gz', 'id': archive_link}]
    else:
        return []
示例#36
0
def analysis_describe_with_retry(analysis_id_or_handler):
    if isinstance(analysis_id_or_handler, basestring):
        handler = dxpy.get_handler(analysis_id_or_handler)
    else:
        handler = analysis_id_or_handler
    # All the describe fields may not be available immediately. Wait
    # until they have been populated.
    for i in range(200):  # Don't wait an unbounded amount of time
        desc = handler.describe()
        # Sufficient to look for any field, other than 'id', that is
        # present in all job describe hashes
        if all('executable' in stage['execution'] for stage in desc['stages']):
            return desc
        time.sleep(0.5)
    raise IOError('Timed out while waiting for ' + analysis_id_or_handler.get_id() + ' to have all jobs populated')
示例#37
0
def analysis_describe_with_retry(analysis_id_or_handler):
    if isinstance(analysis_id_or_handler, basestring):
        handler = dxpy.get_handler(analysis_id_or_handler)
    else:
        handler = analysis_id_or_handler
    # All the describe fields may not be available immediately. Wait
    # until they have been populated.
    for i in range(200):  # Don't wait an unbounded amount of time
        desc = handler.describe()
        # Sufficient to look for any field, other than 'id', that is
        # present in all job describe hashes
        if all('executable' in stage['execution'] for stage in desc['stages']):
            return desc
        time.sleep(0.5)
    raise IOError('Timed out while waiting for ' + analysis_id_or_handler.get_id() + ' to have all jobs populated')
示例#38
0
def get_dxfile(filePath, project=None):
    '''Returns dxfile object.'''
    dxfile = None
    #if filePath.find("$dnanexus_link") != -1:
    #    filePath = filePath.split(' ')[1]
    #    filePath = filePath.replace("'","").replace('"','').replace("}","").replace("{","")
    try:
        dxlink = json.loads(filePath.strip("'"))
    except:
        dxlink = None

    if project != None:

        try:
            if dxlink != None:
                dxfile = dxpy.get_handler(dxlink, project=project)
            else:
                dxfile = dxpy.get_handler(filePath, project=project)
        except:
            try:
                dxlink = dxpy.dxlink(filePath, project=project)
                dxfile = dxpy.get_handler(dxlink)
            except:
                try:
                    proj_id = env_get_current_project_id()
                    dxfile = dxpy.DXFile(filePath, project=proj_id)
                except:
                    sys.stderr.write('ERROR: unable to find file "' +
                                     filePath + '": \n')
                    sys.exit(0)  # Do not error on tool run in dx script

    else:

        try:
            if dxlink != None:
                dxfile = dxpy.get_handler(dxlink)
            else:
                dxfile = dxpy.get_handler(filePath)
        except:
            try:
                dxlink = dxpy.dxlink(filePath)
                dxfile = dxpy.get_handler(dxlink)
            except:
                try:
                    proj_id = env_get_current_project_id()
                    dxfile = dxpy.DXFile(filePath, project=proj_id)
                except:
                    sys.stderr.write('ERROR: unable to find file "' +
                                     filePath + '": \n')
                    sys.exit(0)  # Do not error on tool run in dx script

    if dxfile == None:
        sys.stderr.write('ERROR: unable to find file "' + filePath + '": \n')
        sys.exit(0)  # Do not error on tool run in dx script

    return dxfile
示例#39
0
 def add_file(iname, subdir, value):
     if not dxpy.is_dxlink(value):
         return
     handler = dxpy.get_handler(value)
     if not isinstance(handler, dxpy.DXFile):
         return
     filename = make_unix_filename(handler.name)
     trg_dir = iname
     if subdir is not None:
         trg_dir = os.path.join(trg_dir, subdir)
     files[iname].append({
         'trg_fname': os.path.join(trg_dir, filename),
         'handler': handler,
         'src_file_id': handler.id
     })
     dirs.append(trg_dir)
示例#40
0
def get_dxfile(filePath,project=None):
    '''Returns dxfile object.'''
    dxfile = None
    #if filePath.find("$dnanexus_link") != -1:
    #    filePath = filePath.split(' ')[1]
    #    filePath = filePath.replace("'","").replace('"','').replace("}","").replace("{","")
    try:
        dxlink = json.loads(filePath.strip("'"))
    except:
        dxlink = None
        
    if project != None:
        
        try:
            if dxlink != None:
                dxfile = dxpy.get_handler(dxlink,project=project)
            else:
                dxfile = dxpy.get_handler(filePath,project=project)
        except:
            try:
                dxlink = dxpy.dxlink(filePath,project=project)
                dxfile = dxpy.get_handler(dxlink)
            except:
                try:
                    proj_id = env_get_current_project_id()
                    dxfile = dxpy.DXFile(filePath,project=proj_id)
                except:
                    sys.stderr.write('WARNING: unable to find file "' + filePath + '": \n')
                    sys.exit(0)  # Do not error on tool run in dx script 
    
    else:
    
        try:
            if dxlink != None:
                dxfile = dxpy.get_handler(dxlink)
            else:
                dxfile = dxpy.get_handler(filePath)
        except:
            try:
                dxlink = dxpy.dxlink(filePath)
                dxfile = dxpy.get_handler(dxlink)
            except:
                try:
                    proj_id = env_get_current_project_id()
                    dxfile = dxpy.DXFile(filePath,project=proj_id)
                except:
                    sys.stderr.write('WARNING: unable to find file "' + filePath + '": \n')
                    sys.exit(0)  # Do not error on tool run in dx script 

    if dxfile == None:
        sys.stderr.write('WARNING: unable to find file "' + filePath + '": \n')
        sys.exit(0)  # Do not error on tool run in dx script 
    
    return dxfile
示例#41
0
def new_workflow(args):
    try_call(process_dataobject_args, args)
    try_call(process_single_dataobject_output_args, args)
    init_from = None
    if args.init is not None:
        if is_analysis_id(args.init):
            init_from = args.init
        else:
            init_project, _init_folder, init_result = try_call(resolve_existing_path, args.init, expected="entity")
            init_from = dxpy.get_handler(init_result["id"], project=init_project)
    if args.output is None:
        project = dxpy.WORKSPACE_ID
        folder = get_env_var("DX_CLI_WD", "/")
        name = None
    else:
        project, folder, name = dxpy.utils.resolver.resolve_path(args.output)
    if args.output_folder is not None:
        try:
            # Try to resolve to a path in the project
            _ignore, args.output_folder, _ignore = resolve_path(args.output_folder, expected="folder")
        except:
            # But if not, just use the value directly
            pass
    try:
        dxworkflow = dxpy.new_dxworkflow(
            title=args.title,
            summary=args.summary,
            description=args.description,
            output_folder=args.output_folder,
            project=project,
            name=name,
            tags=args.tags,
            types=args.types,
            hidden=args.hidden,
            properties=args.properties,
            details=args.details,
            folder=folder,
            parents=args.parents,
            init_from=init_from,
        )
        if args.brief:
            print(dxworkflow.get_id())
        else:
            dxpy.utils.describe.print_desc(dxworkflow.describe(incl_properties=True, incl_details=True), args.verbose)
    except:
        err_exit()
def main(token):
    # Configure dxpy authentication
    dxpy.set_security_context({'auth_token_type': 'Bearer', 'auth_token': token})

    # Resolve FACTORY_PROJECT by ID
    proj = dxpy.DXProject(FACTORY_PROJECT)
    print 'Resolved project:', proj.describe()['name'], proj.get_id()

    # Set FACTORY_PROJECT as the workspace for subsequent operations
    # (sort of like the current working directory)
    dxpy.set_workspace_id(FACTORY_PROJECT)

    # Resolve the workflow by name. (Could also store ID like the project)
    wf = list(dxpy.search.find_data_objects(classname="workflow", name="RNA-seq pipeline",
                                            return_handler=True))[0]
    print 'Resolved workflow:', wf.describe()['name'], wf.get_id()

    # TODO: Stage the inputs. Here we find them in the IN folder
    left_reads = list(dxpy.search.find_data_objects(classname="file", name="ENCFF001JPX.1k.fastq.gz",
                                                    folder="/IN", return_handler=True))[0]
    print 'Resolved left reads:', left_reads.describe()['name'], left_reads.get_id()
    right_reads = list(dxpy.search.find_data_objects(classname="file", name="ENCFF001JQB.1k.fastq.gz",
                                                     folder="/IN", return_handler=True))[0]
    print 'Resolved right reads:', right_reads.describe()['name'], right_reads.get_id()

    # Launch the workflow
    analysis = wf.run({'0.fastqs': [dxpy.dxlink(left_reads.get_id())],
                       '0.fastq_pairs': [dxpy.dxlink(right_reads.get_id())]})
    print 'Launched analysis:', analysis.get_id()
    print 'Analysis state:', analysis.describe()['state']

    # TODO: Poll for (or come back when) analysis state 'done' or 'failed'.
    # Handle any failures.

    # Cooking-show-style substitution with completed analysis
    analysis = dxpy.DXAnalysis(COMPLETED_ANALYSIS)
    print 'Analysis state:', analysis.describe()['state']

    # Enumerate outputs
    print 'Analysis outputs:'
    for one_output_name, one_output_link in analysis.describe()['output'].iteritems():
        one_output = dxpy.get_handler(one_output_link) # one_output : dxpy.DXFile
        one_file_name = one_output.describe()['name']
        one_file_url, _ = one_output.get_download_url(preauthenticated=True, filename=one_file_name)
        print one_file_name, one_file_url
def resolve_dx_file(identifier):
    try:
        handler = dxpy.get_handler(identifier)
    except dxpy.DXError:
        try:
            handler = dxpy.find_one_data_object(
                classname='file',
                name=identifier,
                return_handler=True,
                zero_ok=False,
                more_ok=False)
        except dxpy.DXSearchError:
            logging.error('Failed to resolve control %s to unique dx object.  ID or name does not exist or multiple files of that name were found.' % (str(identifier)))
            return None
        else:
            return handler
    else:
        return handler
def resolve_dx_file(identifier):
    try:
        handler = dxpy.get_handler(identifier)
    except dxpy.DXError:
        try:
            handler = dxpy.find_one_data_object(
                classname='file',
                name=identifier,
                return_handler=True,
                zero_ok=False,
                more_ok=False)
        except dxpy.DXSearchError:
            logging.error('Failed to resolve control %s to unique dx object.  ID or name does not exist or multiple files of that name were found.' % (str(identifier)))
            return None
        else:
            return handler
    else:
        return handler
示例#45
0
def wait_for_depends_on(depends_on, all_job_outputs):
    # Wait for depends_on and any data objects in the input to close
    if len(depends_on) > 0:
        print(fill('Processing dependsOn and any DNAnexus links to closing objects in the input'))
        for an_id in depends_on:
            try:
                print('  Waiting for ' + an_id + '...')
                if an_id.startswith('localjob'):
                    if all_job_outputs.get(an_id) is None:
                        raise Exception('Job ' + an_id + ' could not be found in local finished jobs')
                elif an_id.startswith('job'):
                    dxjob = dxpy.DXJob(an_id)
                    dxjob.wait_on_done()
                else:
                    handler = dxpy.get_handler(an_id)
                    desc = handler.describe()
                    handler._wait_on_close()
            except Exception as e:
                raise Exception('Could not wait for ' + an_id + ': ' + str(e))
示例#46
0
def wait_for_depends_on(depends_on, all_job_outputs):
    # Wait for depends_on and any data objects in the input to close
    if len(depends_on) > 0:
        print(fill('Processing dependsOn and any DNAnexus links to closing objects in the input'))
        for an_id in depends_on:
            try:
                print('  Waiting for ' + an_id + '...')
                if an_id.startswith('localjob'):
                    if all_job_outputs.get(an_id) is None:
                        raise Exception('Job ' + an_id + ' could not be found in local finished jobs')
                elif an_id.startswith('job'):
                    dxjob = dxpy.DXJob(an_id)
                    dxjob.wait_on_done()
                else:
                    handler = dxpy.get_handler(an_id)
                    desc = handler.describe()
                    handler._wait_on_close()
            except Exception as e:
                raise Exception('Could not wait for ' + an_id + ': ' + str(e))
示例#47
0
def new_workflow(args):
    try_call(process_dataobject_args, args)
    try_call(process_single_dataobject_output_args, args)
    init_from = None
    if args.init is not None:
        try:
            init_project, init_folder, init_result = try_call(resolve_existing_path,
                                                              args.init,
                                                              expected='entity')
            init_from = dxpy.get_handler(init_result['id'], project=init_project)
        except:
            init_from = args.init
    if args.output is None:
        project = dxpy.WORKSPACE_ID
        folder = os.environ.get('DX_CLI_WD', '/')
        name = None
    else:
        project, folder, name = dxpy.utils.resolver.resolve_path(args.output)
    if args.output_folder is not None:
        try:
            # Try to resolve to a path in the project
            ignore, args.output_folder, ignore2 = resolve_path(args.output_folder, expected='folder')
        except:
            # But if not, just use the value directly
            pass
    try:
        dxworkflow = dxpy.new_dxworkflow(title=args.title, summary=args.summary,
                                         description=args.description,
                                         output_folder=args.output_folder,
                                         project=project, name=name,
                                         tags=args.tags, types=args.types,
                                         hidden=args.hidden, properties=args.properties,
                                         details=args.details,
                                         folder=folder,
                                         parents=args.parents, init_from=init_from)
        if args.brief:
            print dxworkflow.get_id()
        else:
            dxpy.utils.describe.print_desc(dxworkflow.describe(incl_properties=True, incl_details=True),
                                           args.verbose)
    except:
        err_exit()
示例#48
0
            def check_file_content(out_param_name, out_filename, tmp_fname, str_content):
                """
                Download a file, read it from local disk, and verify that it has the correct contents
                """
                if not out_param_name in job_output:
                    raise "Error: key {} does not appear in the job output".format(out_param_name)
                dxlink = job_output[out_param_name]

                # check that the filename gets preserved
                trg_fname = dxpy.get_handler(dxlink).name
                self.assertEqual(trg_fname, out_filename)

                # download the file and check the contents
                silent_file_remove(tmp_fname)
                dxpy.download_dxfile(dxlink, tmp_fname)
                with open(tmp_fname, "r") as fh:
                    data = fh.read()
                    print(data)
                    if not (strip_white_space(data) == strip_white_space(str_content)):
                        raise Exception("contents of file {} do not match".format(out_param_name))
                silent_file_remove(tmp_fname)
示例#49
0
    def test_dxfs_operations(self):
        # FIXME: Make the mount live or add command to refresh it with remote changes
        #subprocess.check_call(['dx', 'mkdir', 'foo'])
        #subprocess.check_call(['dx', 'mkdir', 'bar'])
        #subprocess.check_call(['dx', 'mkdir', '-p', '/bar/baz'])

        self.assertEqual(set(os.listdir(self.mountpoint)), set(['foo', 'bar', os.path.basename(__file__)]))
        
        # Reading
        self.assertEqual(open(__file__).read(), open(os.path.join(self.mountpoint, __file__)).read())
        
        # Moving
        shutil.move(os.path.join(self.mountpoint, __file__), os.path.join(self.mountpoint, __file__+"2"))
        self.assertEqual(set(os.listdir(self.mountpoint)), set(['foo', 'bar', os.path.basename(__file__+"2")]))
        shutil.move(os.path.join(self.mountpoint, __file__+"2"), os.path.join(self.mountpoint, "foo"))
        self.assertEqual(set(os.listdir(os.path.join(self.mountpoint, 'foo'))), set([os.path.basename(__file__+"2")]))
        folder_listing = self.project.list_folder('/foo')
        self.assertEqual(len(folder_listing['folders']), 0)
        self.assertEqual(len(folder_listing['objects']), 1)
        self.assertEqual(dxpy.get_handler(folder_listing['objects'][0]['id']).name, os.path.basename(__file__+"2"))
        self.assertEqual(open(__file__).read(), open(os.path.join(self.mountpoint, 'foo', __file__+"2")).read())
示例#50
0
def list_subfolders(project, path, recurse=True):
    '''
    :param project: Project ID to use as context for the listing
    :type project: string
    :param path: Subtree root path
    :type path: string
    :param recurse: Return a complete subfolders tree
    :type recurse: boolean

    Returns a list of subfolders for the remote *path* (included to the result) of the *project*.

    Example::

        list_subfolders("project-xxxx", folder="/input")

    '''
    project_folders = dxpy.get_handler(project).describe(input_params={'folders': True})['folders']
    # TODO: support shell-style path globbing (i.e. /a*/c matches /ab/c but not /a/b/c)
    # return pathmatch.filter(project_folders, os.path.join(path, '*'))
    if recurse:
        return (f for f in project_folders if f.startswith(path))
    else:
        return (f for f in project_folders if f.startswith(path) and '/' not in f[len(path)+1:])
示例#51
0
def _find(api_method, query, limit, return_handler, **kwargs):
    ''' Takes an API method handler (dxpy.api.find...) and calls it with *query*, then wraps a generator around its
    output. Used by the methods below.
    '''
    num_results = 0

    while True:
        resp = api_method(query, **kwargs)

        for i in resp["results"]:
            if num_results == limit:
                raise StopIteration()
            num_results += 1
            if return_handler:
                handler = dxpy.get_handler(i['id'], project=i.get('project'))
                yield handler
            else:
                yield i

        # set up next query
        if resp["next"] is not None:
            query["starting"] = resp["next"]
        else:
            raise StopIteration()
示例#52
0
def _find(api_method, query, limit, return_handler, **kwargs):
    """ Takes an API method handler (dxpy.api.find...) and calls it with *query*, then wraps a generator around its
    output. Used by the methods below.
    """
    num_results = 0

    while True:
        resp = api_method(query, **kwargs)

        for i in resp["results"]:
            if num_results == limit:
                raise StopIteration()
            num_results += 1
            if return_handler:
                handler = dxpy.get_handler(i["id"], project=i.get("project"))
                yield handler
            else:
                yield i

        # set up next query
        if resp["next"] is not None:
            query["starting"] = resp["next"]
        else:
            raise StopIteration()
示例#53
0
def upload_resources(src_dir,
                     project=None,
                     folder='/',
                     ensure_upload=False,
                     force_symlinks=False):
    """
    :param ensure_upload: If True, will bypass checksum of resources directory
                          and upload resources bundle unconditionally;
                          will NOT be able to reuse this bundle in future builds.
                          Else if False, will compute checksum and upload bundle
                          if checksum is different from a previously uploaded
                          bundle's checksum.
    :type ensure_upload: boolean
    :param force_symlinks: If true, will bypass the attempt to dereference any
                           non-local symlinks and will unconditionally include
                           the link as-is.  Note that this will almost certainly
                           result in a broken link within the resource directory
                           unless you really know what you're doing.
    :type force_symlinks: boolean
    :returns: A list (possibly empty) of references to the generated archive(s)
    :rtype: list

    If it exists, archives and uploads the contents of the
    ``resources/`` subdirectory of *src_dir* to a new remote file
    object, and returns a list describing a single bundled dependency in
    the form expected by the ``bundledDepends`` field of a run
    specification. Returns an empty list, if no archive was created.
    """
    applet_spec = _get_applet_spec(src_dir)

    if project is None:
        dest_project = applet_spec['project']
    else:
        dest_project = project
        applet_spec['project'] = project

    resources_dir = os.path.join(src_dir, "resources")
    if os.path.exists(resources_dir) and len(os.listdir(resources_dir)) > 0:
        target_folder = applet_spec[
            'folder'] if 'folder' in applet_spec else folder

        # While creating the resource bundle, optimistically look for a
        # resource bundle with the same contents, and reuse it if possible.
        # The resource bundle carries a property 'resource_bundle_checksum'
        # that indicates the checksum; the way in which the checksum is
        # computed is given below.   If the checksum matches  (and
        # ensure_upload is False), then we will use the existing file,
        # otherwise, we will compress and upload the tarball.

        # The input to the SHA1 contains entries of the form (whitespace
        # only included here for readability):
        #
        # / \0 MODE \0 MTIME \0
        # /foo \0 MODE \0 MTIME \0
        # ...
        #
        # where there is one entry for each directory or file (order is
        # specified below), followed by a numeric representation of the
        # mode, and the mtime in milliseconds since the epoch.
        #
        # Note when looking at a link, if the link is to be dereferenced,
        # the mtime and mode used are that of the target (using os.stat())
        # If the link is to be kept as a link, the mtime and mode are those
        # of the link itself (using os.lstat())

        with tempfile.NamedTemporaryFile(suffix=".tar") as tar_tmp_fh:

            output_sha1 = hashlib.sha1()
            tar_fh = tarfile.open(fileobj=tar_tmp_fh, mode='w')

            for dirname, subdirs, files in os.walk(resources_dir):
                if not dirname.startswith(resources_dir):
                    raise AssertionError(
                        'Expected %r to start with root directory %r' %
                        (dirname, resources_dir))

                # Add an entry for the directory itself
                relative_dirname = dirname[len(resources_dir):]
                dir_stat = os.lstat(dirname)
                if not relative_dirname.startswith('/'):
                    relative_dirname = '/' + relative_dirname

                fields = [
                    relative_dirname,
                    str(_fix_perms(dir_stat.st_mode)),
                    str(int(dir_stat.st_mtime * 1000))
                ]
                output_sha1.update(b''.join(
                    s.encode('utf-8') + b'\0' for s in fields))

                # add an entry in the tar file for the current directory, but
                # do not recurse!
                tar_fh.add(dirname,
                           arcname='.' + relative_dirname,
                           recursive=False,
                           filter=_fix_perm_filter)

                # Canonicalize the order of subdirectories; this is the order in
                # which they will be visited by os.walk
                subdirs.sort()

                # check the subdirectories for symlinks.  We should throw an error
                # if there are any links that point outside of the directory (unless
                # --force-symlinks is given).  If a link is pointing internal to
                # the directory (or --force-symlinks is given), we should add it
                # as a file.
                for subdir_name in subdirs:
                    dir_path = os.path.join(dirname, subdir_name)

                    # If we do have a symlink,
                    if os.path.islink(dir_path):
                        # Let's get the pointed-to path to ensure that it is
                        # still in the directory
                        link_target = os.readlink(dir_path)

                        # If this is a local link, add it to the list of files (case 1)
                        # else raise an error
                        if force_symlinks or is_link_local(link_target):
                            files.append(subdir_name)
                        else:
                            raise AppBuilderException(
                                "Cannot include symlinks to directories outside of the resource directory.  '%s' points to directory '%s'"
                                % (dir_path, os.path.realpath(dir_path)))

                # Canonicalize the order of files so that we compute the
                # checksum in a consistent order
                for filename in sorted(files):
                    deref_link = False

                    relative_filename = os.path.join(relative_dirname,
                                                     filename)
                    true_filename = os.path.join(dirname, filename)

                    file_stat = os.lstat(true_filename)
                    # check for a link here, please!
                    if os.path.islink(true_filename):

                        # Get the pointed-to path
                        link_target = os.readlink(true_filename)

                        if not (force_symlinks or is_link_local(link_target)):
                            # if we are pointing outside of the directory, then:
                            # try to get the true stat of the file and make sure
                            # to dereference the link!
                            try:
                                file_stat = os.stat(
                                    os.path.join(dirname, link_target))
                                deref_link = True
                            except OSError:
                                # uh-oh! looks like we have a broken link!
                                # since this is guaranteed to cause problems (and
                                # we know we're not forcing symlinks here), we
                                # should throw an error
                                raise AppBuilderException(
                                    "Broken symlink: Link '%s' points to '%s', which does not exist"
                                    % (true_filename,
                                       os.path.realpath(true_filename)))

                    fields = [
                        relative_filename,
                        str(_fix_perms(file_stat.st_mode)),
                        str(int(file_stat.st_mtime * 1000))
                    ]
                    output_sha1.update(b''.join(
                        s.encode('utf-8') + b'\0' for s in fields))

                    # If we are to dereference, use the target fn
                    if deref_link:
                        true_filename = os.path.realpath(true_filename)

                    tar_fh.add(true_filename,
                               arcname='.' + relative_filename,
                               filter=_fix_perm_filter)

                # end for filename in sorted(files)

            # end for dirname, subdirs, files in os.walk(resources_dir):

            # at this point, the tar is complete, so close the tar_fh
            tar_fh.close()

            # Optimistically look for a resource bundle with the same
            # contents, and reuse it if possible. The resource bundle
            # carries a property 'resource_bundle_checksum' that indicates
            # the checksum; the way in which the checksum is computed is
            # given in the documentation of _directory_checksum.

            if ensure_upload:
                properties_dict = {}
                existing_resources = False
            else:
                directory_checksum = output_sha1.hexdigest()
                properties_dict = dict(
                    resource_bundle_checksum=directory_checksum)
                existing_resources = dxpy.find_one_data_object(
                    project=dest_project,
                    folder=target_folder,
                    properties=dict(
                        resource_bundle_checksum=directory_checksum),
                    visibility='either',
                    zero_ok=True,
                    state='closed',
                    return_handler=True)

            if existing_resources:
                logger.info(
                    "Found existing resource bundle that matches local resources directory: "
                    + existing_resources.get_id())

                dx_resource_archive = existing_resources
            else:

                logger.debug("Uploading in " + src_dir)
                # We need to compress the tar that we've created

                targz_fh = tempfile.NamedTemporaryFile(suffix=".tar.gz",
                                                       delete=False)

                # compress the file by reading the tar file and passing
                # it though a GzipFile object, writing the given
                # block size (by default 8192 bytes) at a time
                targz_gzf = gzip.GzipFile(fileobj=targz_fh, mode='wb')
                tar_tmp_fh.seek(0)
                dat = tar_tmp_fh.read(io.DEFAULT_BUFFER_SIZE)
                while dat:
                    targz_gzf.write(dat)
                    dat = tar_tmp_fh.read(io.DEFAULT_BUFFER_SIZE)

                targz_gzf.flush()
                targz_gzf.close()
                targz_fh.close()

                if 'folder' in applet_spec:
                    try:
                        dxpy.get_handler(dest_project).new_folder(
                            applet_spec['folder'], parents=True)
                    except dxpy.exceptions.DXAPIError:
                        pass  # TODO: make this better

                dx_resource_archive = dxpy.upload_local_file(
                    targz_fh.name,
                    wait_on_close=True,
                    project=dest_project,
                    folder=target_folder,
                    hidden=True,
                    properties=properties_dict)

                os.unlink(targz_fh.name)

                # end compressed file creation and upload

            archive_link = dxpy.dxlink(dx_resource_archive.get_id())

        # end tempfile.NamedTemporaryFile(suffix=".tar") as tar_fh

        return [{'name': 'resources.tar.gz', 'id': archive_link}]
    else:
        return []
示例#54
0
def upload_resources(src_dir, project=None, folder='/'):
    """
    :returns: A list (possibly empty) of references to the generated archive(s)
    :rtype: list

    If it exists, archives and uploads the contents of the
    ``resources/`` subdirectory of *src_dir* to a new remote file
    object, and returns a list describing a single bundled dependency in
    the form expected by the ``bundledDepends`` field of a run
    specification. Returns an empty list, if no archive was created.
    """
    applet_spec = _get_applet_spec(src_dir)

    if project is None:
        dest_project = applet_spec['project']
    else:
        dest_project = project
        applet_spec['project'] = project

    resources_dir = os.path.join(src_dir, "resources")
    if os.path.exists(resources_dir) and len(os.listdir(resources_dir)) > 0:
        target_folder = applet_spec[
            'folder'] if 'folder' in applet_spec else folder

        # Optimistically look for a resource bundle with the same
        # contents, and reuse it if possible. The resource bundle
        # carries a property 'resource_bundle_checksum' that indicates
        # the checksum; the way in which the checksum is computed is
        # given in the documentation of _directory_checksum.
        directory_checksum = _directory_checksum(resources_dir)
        existing_resources = dxpy.find_one_data_object(
            project=dest_project,
            folder=target_folder,
            properties=dict(resource_bundle_checksum=directory_checksum),
            visibility='either',
            zero_ok=True,
            state='closed',
            return_handler=True)

        if existing_resources:
            logger.info(
                "Found existing resource bundle that matches local resources directory: "
                + existing_resources.get_id())

            dx_resource_archive = existing_resources
        else:
            logger.debug("Uploading in " + src_dir)

            with tempfile.NamedTemporaryFile(suffix=".tar.gz") as tar_fh:
                # The directory contents may have changed since the
                # first time we checksummed the directory. Ideally we
                # would extract the tar file to determine the checksum
                # of the actually archived files, but maybe this is a
                # little too paranoid.
                subprocess.check_call(
                    ['tar', '-C', resources_dir, '-czf', tar_fh.name, '.'])
                if 'folder' in applet_spec:
                    try:
                        dxpy.get_handler(dest_project).new_folder(
                            applet_spec['folder'], parents=True)
                    except dxpy.exceptions.DXAPIError:
                        pass  # TODO: make this better
                dx_resource_archive = dxpy.upload_local_file(
                    tar_fh.name,
                    wait_on_close=True,
                    project=dest_project,
                    folder=target_folder,
                    hidden=True,
                    properties=dict(
                        resource_bundle_checksum=directory_checksum))

        archive_link = dxpy.dxlink(dx_resource_archive.get_id())
        return [{'name': 'resources.tar.gz', 'id': archive_link}]
    else:
        return []
示例#55
0
def path_completer(text, expected=None, classes=None, perm_level=None,
                   include_current_proj=False, typespec=None, visibility=None):
    '''
    :param text: String to tab-complete to a path matching the syntax project-name:folder/entity_or_folder_name
    :type text: string
    :param expected: "folder", "entity", "project", or None (no restriction) as to the types of answers to look for
    :type expected: string
    :param classes: if expected="entity", the possible data object classes that are acceptable
    :type classes: list of strings
    :param perm_level: the minimum permissions level required, e.g. "VIEW" or "CONTRIBUTE"
    :type perm_level: string
    :param include_current_proj: Indicate whether the current project's name should be a potential result
    :type include_current_proj: boolean
    :param visibility: Visibility with which to restrict the completion (one of "either", "visible", or "hidden") (default behavior is dependent on *text*)

    Returns a list of matches to the text and restricted by the
    requested parameters.
    '''

    colon_pos = get_last_pos_of_char(':', text)
    slash_pos = get_last_pos_of_char('/', text)
    delim_pos = max(colon_pos, slash_pos)

    # First get projects if necessary
    matches = []
    if expected == 'project' and colon_pos > 0 and colon_pos == len(text) - 1:
        if dxpy.find_one_project(zero_ok=True, name=text[:colon_pos]) is not None:
            return [text + " "]

    if colon_pos < 0 and slash_pos < 0:
        # Might be tab-completing a project, but don't ever include
        # whatever's set as dxpy.WORKSPACE_ID unless expected == "project"
        # Also, don't bother if text=="" and expected is NOT "project"
        # Also, add space if expected == "project"
        if text != "" or expected == 'project':
            results = dxpy.find_projects(describe=True, level=perm_level)
            if not include_current_proj:
                results = [r for r in results if r['id'] != dxpy.WORKSPACE_ID]
            matches += [escape_colon(r['describe']['name'])+':' for r in results if r['describe']['name'].startswith(text)]

    if expected == 'project':
        return matches

    # Attempt to tab-complete to a folder or data object name
    if colon_pos < 0 and slash_pos >= 0:
        # Not tab-completing a project, and the project is unambiguous
        # (use dxpy.WORKSPACE_ID)
        if dxpy.WORKSPACE_ID is not None:
            # try-catch block in case dxpy.WORKSPACE_ID is garbage
            try:
                dxproj = dxpy.get_handler(dxpy.WORKSPACE_ID)
                folderpath, entity_name = clean_folder_path(text)
                matches += get_folder_matches(text, slash_pos, dxproj, folderpath)
                if expected != 'folder':
                    if classes is not None:
                        for classname in classes:
                            matches += get_data_matches(text, slash_pos, dxproj,
                                                        folderpath, classname=classname,
                                                        typespec=typespec,
                                                        visibility=visibility)
                    else:
                        matches += get_data_matches(text, slash_pos, dxproj,
                                                    folderpath, typespec=typespec,
                                                    visibility=visibility)
            except:
                pass
    else:
        # project is given by a path, but attempt to resolve to an
        # object or folder anyway
        try:
            proj_ids, folderpath, entity_name = resolve_path(text, multi_projects=True)
        except ResolutionError as details:
            sys.stderr.write("\n" + fill(unicode(details)))
            return matches
        for proj in proj_ids:
            # protects against dxpy.WORKSPACE_ID being garbage
            try:
                dxproj = dxpy.get_handler(proj)
                matches += get_folder_matches(text, delim_pos, dxproj, folderpath)
                if expected != 'folder':
                    if classes is not None:
                        for classname in classes:
                            matches += get_data_matches(text, delim_pos, dxproj,
                                                        folderpath, classname=classname,
                                                        typespec=typespec, visibility=visibility)
                    else:
                        matches += get_data_matches(text, delim_pos, dxproj,
                                                    folderpath, typespec=typespec,
                                                    visibility=visibility)
            except:
                pass
    return matches
示例#56
0
    def test_sub_jobs(self):
        '''  Tests a bash script that generates sub-jobs '''
        with temporary_project(
                'TestDXBashHelpers.test_app1 temporary project') as dxproj:
            env = update_environ(DX_PROJECT_CONTEXT_ID=dxproj.get_id())

            # Upload some files for use by the applet
            dxpy.upload_string("1234\n", project=dxproj.get_id(), name="A.txt")
            dxpy.upload_string("ABCD\n", project=dxproj.get_id(), name="B.txt")

            # Build the applet, patching in the bash helpers from the
            # local checkout
            applet_id = build_app_with_bash_helpers(
                os.path.join(TEST_APPS, 'with-subjobs'), dxproj.get_id())
            # Run the applet.
            # Since the job creates two sub-jobs, we need to be a bit more sophisticated
            # in order to wait for completion.
            applet_args = ["-ifiles=A.txt", "-ifiles=B.txt"]
            cmd_args = ['dx', 'run', '--yes', '--brief', applet_id]
            cmd_args.extend(applet_args)
            job_id = run(cmd_args, env=env).strip()

            dxpy.DXJob(job_id).wait_on_done()

            # Assertions -- making sure the script worked
            # Assertions to make about the job's output after it is done running:
            # - *first_file* is a file named first_file.txt containing the string:
            #     "contents of first_file"
            # - *final_file* is a file named final_file.txt containing the
            #   *concatenation of the two input files in *files*
            print("Test completed successfully, checking file content\n")

            job_handler = dxpy.get_handler(job_id)
            job_output = job_handler.output

            def strip_white_space(_str):
                return ''.join(_str.split())

            def silent_file_remove(filename):
                try:
                    os.remove(filename)
                except OSError:
                    pass

            # The output should include two files, this section verifies that they have
            # the correct data.
            def check_file_content(out_param_name, out_filename, tmp_fname,
                                   str_content):
                """
                Download a file, read it from local disk, and verify that it has the correct contents
                """
                if not out_param_name in job_output:
                    raise "Error: key {} does not appear in the job output".format(
                        out_param_name)
                dxlink = job_output[out_param_name]

                # check that the filename gets preserved
                trg_fname = dxpy.get_handler(dxlink).name
                self.assertEqual(trg_fname, out_filename)

                # download the file and check the contents
                silent_file_remove(tmp_fname)
                dxpy.download_dxfile(dxlink, tmp_fname)
                with open(tmp_fname, "r") as fh:
                    data = fh.read()
                    print(data)
                    if not (strip_white_space(data)
                            == strip_white_space(str_content)):
                        raise Exception(
                            "contents of file {} do not match".format(
                                out_param_name))
                silent_file_remove(tmp_fname)

            check_file_content('first_file', 'first_file.txt', "f1.txt",
                               "contents of first_file")
            check_file_content('final_file', 'final_file.txt', "f2.txt",
                               "1234ABCD")
示例#57
0
def interactive_help(in_class, param_desc, prompt):
    is_array = param_desc['class'].startswith("array:")
    print_param_help(param_desc)
    print()
    array_help_str = ', or <ENTER> to finish the list of inputs'
    if in_class in dx_data_classes:
        # Class is some sort of data object
        if dxpy.WORKSPACE_ID is not None:
            proj_name = None
            try:
                proj_name = dxpy.api.project_describe(dxpy.WORKSPACE_ID)['name']
            except:
                pass
            if proj_name is not None:
                print('Your current working directory is ' + proj_name + ':' + dxpy.config.get('DX_CLI_WD', '/'))
        while True:
            print('Pick an option to find input data:')
            try:
                opt_num = pick(['List and choose from available data in the current project',
                                'List and choose from available data in the DNAnexus Reference Genomes project',
                                'Select another project to list and choose available data',
                                'Select an output from a previously-run job (current project only)',
                                'Return to original prompt (specify an ID or path directly)'])
            except KeyboardInterrupt:
                opt_num = 4
            if opt_num == 0:
                query_project = dxpy.WORKSPACE_ID
            elif opt_num == 1:
                query_project = dxpy.find_one_project(name="Reference Genome Files", public=True, billed_to="org-dnanexus", level="VIEW")['id']
            elif opt_num == 2:
                project_generator = dxpy.find_projects(level='VIEW', describe=True, explicit_perms=True)
                print('\nProjects to choose from:')
                query_project = paginate_and_pick(project_generator, (lambda result: result['describe']['name']))['id']
            if opt_num in range(3):
                result_generator = dxpy.find_data_objects(classname=in_class,
                                                          typename=param_desc.get('type'),
                                                          describe=True,
                                                          project=query_project)
                print('\nAvailable data:')
                result_choice = paginate_and_pick(result_generator,
                                                  (lambda result: get_ls_l_desc(result['describe'])))
                if result_choice == 'none found':
                    print('No compatible data found')
                    continue
                elif result_choice == 'none picked':
                    continue
                else:
                    return [result_choice['project'] + ':' + result_choice['id']]
            elif opt_num == 3:
                # Select from previous jobs in current project
                result_generator = dxpy.find_jobs(project=dxpy.WORKSPACE_ID,
                                                  describe=True,
                                                  parent_job="none")
                print()
                print('Previously-run jobs to choose from:')
                result_choice = paginate_and_pick(result_generator,
                                                  (lambda result: get_find_executions_string(result['describe'],
                                                                                             has_children=False,
                                                                                             single_result=True)),
                                                  filter_fn=(lambda result: result['describe']['state'] not in ['unresponsive', 'terminating', 'terminated', 'failed']))
                if result_choice == 'none found':
                    print('No jobs found')
                    continue
                elif result_choice == 'none picked':
                    continue
                else:
                    if 'output' in result_choice['describe'] and result_choice['describe']['output'] != None:
                        keys = result_choice['describe']['output'].keys()
                    else:
                        exec_handler = dxpy.get_handler(result_choice.get('app', result_choice['applet']))
                        exec_desc = exec_handler.describe()
                        if 'outputSpec' not in exec_desc:
                            # This if block will either continue, return, or raise
                            print('No output spec found for the executable')
                            try:
                                field = input('Output field to use (^C or <ENTER> to cancel): ')
                                if field == '':
                                    continue
                                else:
                                    return [result_choice['id'] + ':' + field]
                            except KeyboardInterrupt:
                                continue
                        else:
                            keys = exec_desc['outputSpec'].keys()
                    if len(keys) > 1:
                        print('\nOutput fields to choose from:')
                        field_choice = pick(keys)
                        return [result_choice['id'] + ':' + keys[field_choice]]
                    elif len(keys) == 1:
                        print('Using the only output field: ' + keys[0])
                        return [result_choice['id'] + ':' + keys[0]]
                    else:
                        print('No available output fields')
            else:
                print(fill('Enter an ID or path (<TAB> twice for compatible ' + in_class + 's in current directory)' + (array_help_str if is_array else '')))
                return shlex.split(input(prompt))
    else:
        if in_class == 'boolean':
            if is_array:
                print(fill('Enter "true", "false"' + array_help_str))
            else:
                print(fill('Enter "true" or "false"'))
        elif in_class == 'string' and is_array:
                print(fill('Enter a nonempty string' + array_help_str))
        elif (in_class == 'float' or in_class == 'int') and is_array:
            print(fill('Enter a number' + array_help_str))
        elif in_class == 'hash':
            print(fill('Enter a quoted JSON hash'))
        result = input(prompt)
        if in_class == 'string':
            return [result]
        else:
            return shlex.split(result)
示例#58
0
def interactive_help(in_class, param_desc, prompt):
    is_array = param_desc['class'].startswith("array:")
    print_param_help(param_desc)
    print()
    array_help_str = ', or <ENTER> to finish the list of inputs'
    if in_class in dx_data_classes:
        # Class is some sort of data object
        if dxpy.WORKSPACE_ID is not None:
            proj_name = None
            try:
                proj_name = dxpy.api.project_describe(dxpy.WORKSPACE_ID)['name']
            except:
                pass
            if proj_name is not None:
                print('Your current working directory is ' + proj_name + ':' + dxpy.config.get('DX_CLI_WD', '/'))
        while True:
            print('Pick an option to find input data:')
            try:
                opt_num = pick(['List and choose from available data in the current project',
                                'List and choose from available data in the DNAnexus Reference Genomes Files project',
                                'Select another project to list and choose available data',
                                'Select an output from a previously-run job (current project only)',
                                'Return to original prompt (specify an ID or path directly)'])
            except KeyboardInterrupt:
                opt_num = 4
            if opt_num == 0:
                query_project = dxpy.WORKSPACE_ID
            elif opt_num == 1:
                region = None
                if dxpy.WORKSPACE_ID:
                    region = dxpy.describe(dxpy.WORKSPACE_ID).get("region")
                query_project = dxpy.find_one_project(name="Reference Genome Files:*", public=True, billed_to="org-dnanexus_apps", level="VIEW", name_mode="glob", region=region)['id']
            elif opt_num == 2:
                project_generator = dxpy.find_projects(level='VIEW', describe=True, explicit_perms=True)
                print('\nProjects to choose from:')
                query_project = paginate_and_pick(project_generator, (lambda result: result['describe']['name']))['id']
            if opt_num in range(3):
                result_generator = dxpy.find_data_objects(classname=in_class,
                                                          typename=param_desc.get('type'),
                                                          describe=dict(fields=get_ls_l_desc_fields()),
                                                          project=query_project)
                print('\nAvailable data:')
                result_choice = paginate_and_pick(result_generator,
                                                  (lambda result: get_ls_l_desc(result['describe'])))
                if result_choice == 'none found':
                    print('No compatible data found')
                    continue
                elif result_choice == 'none picked':
                    continue
                else:
                    return [result_choice['project'] + ':' + result_choice['id']]
            elif opt_num == 3:
                # Select from previous jobs in current project
                result_generator = dxpy.find_jobs(project=dxpy.WORKSPACE_ID,
                                                  describe=True,
                                                  parent_job="none")
                print()
                print('Previously-run jobs to choose from:')
                result_choice = paginate_and_pick(result_generator,
                                                  (lambda result: get_find_executions_string(result['describe'],
                                                                                             has_children=False,
                                                                                             single_result=True)),
                                                  filter_fn=(lambda result: result['describe']['state'] not in ['unresponsive', 'terminating', 'terminated', 'failed']))
                if result_choice == 'none found':
                    print('No jobs found')
                    continue
                elif result_choice == 'none picked':
                    continue
                else:
                    if 'output' in result_choice['describe'] and result_choice['describe']['output'] != None:
                        keys = result_choice['describe']['output'].keys()
                    else:
                        exec_handler = dxpy.get_handler(result_choice.get('app', result_choice['applet']))
                        exec_desc = exec_handler.describe()
                        if 'outputSpec' not in exec_desc:
                            # This if block will either continue, return, or raise
                            print('No output spec found for the executable')
                            try:
                                field = input('Output field to use (^C or <ENTER> to cancel): ')
                                if field == '':
                                    continue
                                else:
                                    return [result_choice['id'] + ':' + field]
                            except KeyboardInterrupt:
                                continue
                        else:
                            keys = exec_desc['outputSpec'].keys()
                    if len(keys) > 1:
                        print('\nOutput fields to choose from:')
                        field_choice = pick(keys)
                        return [result_choice['id'] + ':' + keys[field_choice]]
                    elif len(keys) == 1:
                        print('Using the only output field: ' + keys[0])
                        return [result_choice['id'] + ':' + keys[0]]
                    else:
                        print('No available output fields')
            else:
                print(fill('Enter an ID or path (<TAB> twice for compatible ' + in_class + 's in current directory)' + (array_help_str if is_array else '')))
                return shlex.split(input(prompt))
    else:
        if in_class == 'boolean':
            if is_array:
                print(fill('Enter "true", "false"' + array_help_str))
            else:
                print(fill('Enter "true" or "false"'))
        elif in_class == 'string' and is_array:
                print(fill('Enter a nonempty string' + array_help_str))
        elif (in_class == 'float' or in_class == 'int') and is_array:
            print(fill('Enter a number' + array_help_str))
        elif in_class == 'hash':
            print(fill('Enter a quoted JSON hash'))
        result = input(prompt)
        if in_class == 'string':
            return [result]
        else:
            return shlex.split(result)
示例#59
0
def main():

    args = parse_args()
    check_input(args)
    run_id = get_run_id(args.run_dir)

    # Set all naming conventions
    REMOTE_RUN_FOLDER = "/" + run_id + "/runs"
    REMOTE_READS_FOLDER = "/" + run_id + "/reads"
    REMOTE_ANALYSIS_FOLDER = "/" + run_id + "/analyses"

    FILE_PREFIX = "run." + run_id + ".lane."

    # Prep log & record names
    lane_info = []

    # If no lanes are specified, set lane to all, otherwise, set to array of lanes
    if not args.num_lanes:
        lanes_to_upload = ["all"]
    else:
        lanes_to_upload = [str(i) for i in range(1, args.num_lanes + 1)]

    for lane in lanes_to_upload:
        lane_prefix = FILE_PREFIX + lane

        lane_info.append({
            "lane":
            lane,
            "prefix":
            lane_prefix,
            "log_path":
            os.path.join(args.log_dir, lane_prefix + ".log"),
            "record_name":
            lane_prefix + ".upload_sentinel",
            "remote_folder":
            get_target_folder(REMOTE_RUN_FOLDER, lane),
            "uploaded":
            False
        })

    # Create upload sentinel for upload, if record already exists, use that
    done_count = 0
    for lane in lane_info:
        lane_num = lane["lane"]
        try:
            old_record = dxpy.find_one_data_object(
                zero_ok=True,
                typename="UploadSentinel",
                name=lane["record_name"],
                project=args.project,
                folder=lane["remote_folder"])
        except dxpy.exceptions.DXSearchError as e:
            raise_error(
                "Encountered an error looking for %s at %s:%s. %s" %
                (lane["record_name"], lane["remote_folder"], args.project, e))

        if old_record:
            lane["dxrecord"] = dxpy.get_handler(old_record["id"],
                                                project=old_record["project"])
            if lane["dxrecord"].describe()["state"] == "closed":
                print_stderr("Run %s, lane %s has already been uploaded" %
                             (run_id, lane_num))
                lane["uploaded"] = True
                done_count += 1
        else:
            properties = {"run_id": run_id, "lanes": lane_num}
            lane["dxrecord"] = dxpy.new_dxrecord(types=["UploadSentinel"],
                                                 project=args.project,
                                                 folder=lane["remote_folder"],
                                                 parents=True,
                                                 name=lane["record_name"],
                                                 properties=properties)

        # upload RunInfo here, before uploading any data, unless it is already uploaded.
        record = lane["dxrecord"]
        properties = record.get_properties()

        runInfo = dxpy.find_one_data_object(zero_ok=True,
                                            name="RunInfo.xml",
                                            project=args.project,
                                            folder=lane["remote_folder"])
        if not runInfo:
            lane["runinfo_file_id"] = upload_single_file(
                args.run_dir + "/RunInfo.xml", args.project,
                lane["remote_folder"], properties)
        else:
            lane["runinfo_file_id"] = runInfo["id"]

        # Upload samplesheet unless samplesheet-delay is specified or it is already uploaded.
        if not args.samplesheet_delay:
            sampleSheet = dxpy.find_one_data_object(
                zero_ok=True,
                name="SampleSheet.csv",
                project=args.project,
                folder=lane["remote_folder"])
            if not sampleSheet:
                lane["samplesheet_file_id"] = upload_single_file(
                    args.run_dir + "/SampleSheet.csv", args.project,
                    lane["remote_folder"], properties)
            else:
                lane["samplesheet_file_id"] = sampleSheet["id"]

    if done_count == len(lane_info):
        print_stderr("EXITING: All lanes already uploaded")
        sys.exit(1)

    seconds_to_wait = (dxpy.utils.normalize_timedelta(args.run_duration) /
                       1000 * args.intervals_to_wait)
    print_stderr("Maximum allowable time for run to complete: %d seconds." %
                 seconds_to_wait)

    initial_start_time = time.time()
    # While loop waiting for RTAComplete.txt or RTAComplete.xml
    while not termination_file_exists(args.novaseq, args.run_dir):
        start_time = time.time()
        run_time = start_time - initial_start_time
        # Fail if run time exceeds total time to wait
        if run_time > seconds_to_wait:
            print_stderr(
                "EXITING: Upload failed. Run did not complete after %d seconds (max wait = %ds)"
                % (run_time, seconds_to_wait))
            sys.exit(1)

        # Loop through all lanes in run directory
        for lane in lane_info:
            lane_num = lane["lane"]
            if lane["uploaded"]:
                continue
            run_sync_dir(lane, args)

        # Wait at least the minimum time interval before running the loop again
        cur_time = time.time()
        diff = cur_time - start_time
        if diff < args.sync_interval:
            print_stderr("Sleeping for %d seconds" %
                         (int(args.sync_interval - diff)))
            time.sleep(int(args.sync_interval - diff))

    # Final synchronization, upload data, set details
    for lane in lane_info:
        if lane["uploaded"]:
            continue
        file_ids = run_sync_dir(lane, args, finish=True)
        record = lane["dxrecord"]
        properties = record.get_properties()
        lane["log_file_id"] = upload_single_file(lane["log_path"],
                                                 args.project,
                                                 lane["remote_folder"],
                                                 properties)

        for file_id in file_ids:
            dxpy.get_handler(file_id,
                             project=args.project).set_properties(properties)
        details = {
            'run_id': run_id,
            'lanes': lane["lane"],
            'upload_thumbnails': str(args.upload_thumbnails).lower(),
            'dnanexus_path': args.project + ":" + lane["remote_folder"],
            'tar_file_ids': file_ids
        }

        # Upload sample sheet here, if samplesheet-delay specified
        if args.samplesheet_delay:
            lane["samplesheet_file_id"] = upload_single_file(
                args.run_dir + "/SampleSheet.csv", args.project,
                lane["remote_folder"], properties)

        # ID to singly uploaded file (when uploaded successfully)
        if lane.get("log_file_id"):
            details.update({'log_file_id': lane["log_file_id"]})
        if lane.get("runinfo_file_id"):
            details.update({'runinfo_file_id': lane["runinfo_file_id"]})
        if lane.get("samplesheet_file_id"):
            details.update(
                {'samplesheet_file_id': lane["samplesheet_file_id"]})

        record.set_details(details)

        record.close()

    print_stderr("Run %s successfully streamed!" % (run_id))

    downstream_input = {}
    if args.downstream_input:
        try:
            input_dict = json.loads(args.downstream_input)
        except ValueError as e:
            raise_error(
                "Failed to read downstream input as JSON string. %s. %s" %
                (args.downstream_input, e))

        if not isinstance(input_dict, dict):
            raise_error("Expected a dict for downstream input. Got %s." %
                        input_dict)

        for k, v in list(input_dict.items()):
            if not (isinstance(k, str) and
                    (isinstance(v, str) or isinstance(v, dict))):
                raise_error(
                    "Expected (string) key and (string or dict) value pairs for downstream input. Got (%s)%s (%s)%s"
                    % (type(k), k, type(v), v))

            downstream_input[k] = v

    if args.applet:
        # project verified in check_input, assuming no change
        project = dxpy.get_handler(args.project)

        print_stderr("Initiating downstream analysis: given app(let) id %s" %
                     args.applet)

        for info in lane_info:
            lane = info["lane"]
            record = info["dxrecord"]

            # applet verified in check_input, assume no change
            applet = dxpy.get_handler(args.applet)

            # Prepare output folder, if downstream analysis specified
            reads_target_folder = get_target_folder(REMOTE_READS_FOLDER, lane)
            print_stderr("Creating output folder %s" % (reads_target_folder))

            try:
                project.new_folder(reads_target_folder, parents=True)
            except dxpy.DXError as e:
                raise_error("Failed to create new folder %s. %s" %
                            (reads_target_folder, e))

            # Decide on job name (<executable>-<run_id>)
            job_name = applet.title + "-" + run_id

            # Overwite upload_sentinel_record input of applet to the record of inc upload
            downstream_input["upload_sentinel_record"] = dxpy.dxlink(record)

            # Run specified applet
            job = applet.run(downstream_input,
                             folder=reads_target_folder,
                             project=args.project,
                             name=job_name)

            print_stderr("Initiated job %s from applet %s for lane %s" %
                         (job, args.applet, lane))
    # Close if args.applet

    # args.workflow and args.applet are mutually exclusive
    elif args.workflow:
        # project verified in check_input, assuming no change
        project = dxpy.get_handler(args.project)

        print_stderr("Initiating downstream analysis: given workflow id %s" %
                     args.workflow)

        for info in lane_info:
            lane = info["lane"]
            record = info["dxrecord"]

            # workflow verified in check_input, assume no change
            workflow = dxpy.get_handler(args.workflow)

            # Prepare output folder, if downstream analysis specified
            analyses_target_folder = get_target_folder(REMOTE_ANALYSIS_FOLDER,
                                                       lane)
            print_stderr("Creating output folder %s" %
                         (analyses_target_folder))

            try:
                project.new_folder(analyses_target_folder, parents=True)
            except dxpy.DXError as e:
                raise_error("Failed to create new folder %s. %s" %
                            (analyses_target_folder, e))

            # Decide on job name (<executable>-<run_id>)
            job_name = workflow.title + "-" + run_id

            # Overwite upload_sentinel_record input of applet to the record of inc upload
            downstream_input["0.upload_sentinel_record"] = dxpy.dxlink(record)

            # Run specified applet
            job = workflow.run(downstream_input,
                               folder=analyses_target_folder,
                               project=args.project,
                               name=job_name)

            print_stderr("Initiated analyses %s from workflow %s for lane %s" %
                         (job, args.workflow, lane))

    # Close if args.workflow

    if args.script:
        # script has been validated to be executable earlier, assume no change
        try:
            sub.check_call([args.script, args.run_dir])
        except sub.CalledProcessError as e:
            raise_error("Executable (%s) failed with error %d: %s" %
                        (args.script, e.returncode, e.output))
示例#60
0
def check_input(args):
    dxpy.set_security_context({
        "auth_token_type": "Bearer",
        "auth_token": args.api_token
    })

    # Check API token and project context
    try:
        dxpy.get_handler(args.project).describe()
    except dxpy.exceptions.DXAPIError as e:
        if e.name == "InvalidAuthentication":
            raise_error("API token (%s) is not valid. %s" %
                        (args.api_token, e))
        if e.name == "PermissionDenied":
            raise_error("Project (%s) is not valid. %s" % (args.project, e))
    except dxpy.exceptions.DXError as e:
        raise_error("Error getting project handler for project (%s). %s" %
                    (args.project, e))

    # Check that chained downstream applet is valid
    if args.applet:
        try:
            dxpy.get_handler(args.applet).describe()
        except dxpy.exceptions.DXAPIError as e:
            raise_error("Unable to resolve applet %s. %s" % (args.applet, e))
        except dxpy.exceptions.DXError as e:
            raise_error("Error getting handler for applet (%s). %s" %
                        (args.applet, e))

    # Check that chained downstream workflow is valid
    if args.workflow:
        try:
            dxpy.get_handler(args.workflow).describe()
        except dxpy.exceptions.DXAPIError as e:
            raise_error("Unable to resolve workflow %s. %s" %
                        (args.workflow, e))
        except dxpy.exceptions.DXError as e:
            raise_error("Error getting handler for workflow (%s). %s" %
                        (args.workflow, e))

    # Check that executable to launch locally is executable
    if args.script:
        if not (os.path.isfile(args.script)
                and os.access(args.script, os.X_OK)):
            raise_error(
                "Executable/script passed by -s: (%s) is not executable" %
                (args.script))

    if not args.dxpy_upload:
        print_stderr("Checking if ua is in $PATH")
        try:
            sub.check_call(['ua', '--version'],
                           stdout=open(os.devnull, 'w'),
                           close_fds=True)
        except sub.CalledProcessError:
            raise_error(
                "Upload agent executable 'ua' was not found in the $PATH")

    try:
        # We assume that dx_sync_directory is located in the same folder as this script
        # This is resolved by absolute path of invocation
        sub.check_call([
            'python3', '{curr_dir}/dx_sync_directory.py'.format(
                curr_dir=sys.path[0]), '-h'
        ],
                       stdout=open(os.devnull, 'w'),
                       close_fds=True)
    except sub.CalledProcessError:
        raise_error(
            "dx_sync_directory.py not found. Please run incremental " +
            "upload from the directory containing incremental_upload.py " +
            "and dx_sync_directory.py")