Exemplo n.º 1
0
    def setUpClass(cls):
        if RUN_JOB_ON_DX:
            if not project_name:
                print "'PROJ_NAME' environment variable must be defined!"
                sys.exit(1)
            working_project_id = dxpy.find_one_project(more_ok=False,
                                                       name=project_name)["id"]
            run_args = {}
            run_args["project"] = working_project_id
            run_args["name"] = "vcfscope-measure on chr21"
            run_args["folder"] = "/purge/" + app_name
            input_hash = {}
            input_hash["vcfgz"] = dxpy.dxlink("file-BkkjFkj098Gb2jZ1Yx533JFv",
                                              project_id)
            input_hash["bam"] = dxpy.dxlink("file-Bkkjj5Q098Gkvkb3Xx5Pxj1J",
                                            project_id)
            input_hash["bai"] = dxpy.dxlink("file-Bkkjj5Q098GzYx2bG5YJ3z34",
                                            project_id)
            input_hash["region"] = dxpy.dxlink("file-Bkkj22Q098Gz5yK1Q955G5gX",
                                               project_id)

            app = dxpy.DXApp(name=app_name, alias="9.9.7")
            cls.job = app.run(input_hash, **run_args)

        else:
            job_id = "job-F1JpY9Q0pVj0BgpYBp14f31Q"
            cls.job = dxpy.DXJob(job_id)

        cls.job.wait_on_done()
Exemplo n.º 2
0
    def canonical_project(self):
        """The dxid of the unique project for the given project name. Only resolves
        project user has access to.

        Raises:
            MultipleObjectsSameNameError: If project name is not unique on DX platform
            NotFoundError: If project name doesn't exist on DNAnexus
        """
        if utils.is_valid_dxid(self.project, 'project'):
            return self.project

        with _wrap_dx_calls():
            try:
                proj_dict = dxpy.find_one_project(name=self.project,
                                                  level='VIEW',
                                                  zero_ok=True,
                                                  more_ok=False)
            except DXSearchError as e:
                raise MultipleObjectsSameNameError(
                    'Found more than one project for given name: '
                    '{!r}'.format(self.project), e)

        if proj_dict is None:
            raise ProjectNotFoundError(
                'Found no projects for name: {!r}'.format(self.project))

        return proj_dict['id']
Exemplo n.º 3
0
def resolve_project(project_name, level=None):
    try:
        project = dxpy.find_one_project(name=project_name, name_mode='exact',
                                        level=level, return_handler=False)
    except:
        print 'Could not find 1 and only 1 project named {0}.'.format(project_name)
        sys.exit(1)

    return dxpy.DXProject(project['id'])
Exemplo n.º 4
0
def get_project(projectName, level=None):
    '''Returns the DXProject by name or errors out if not found.'''
    try:
        project = dxpy.find_one_project(name=projectName, name_mode='exact',
                                        level=level, return_handler=False)
    except:
        print "Could not find 1 and only 1 project named '"+projectName+"'."
        sys.exit(1)

    return dxpy.DXProject(project['id'])
def resolve_applets_project():
    try:
        project = dxpy.find_one_project(name=ENCODE_CHIP_SEQ_PROJECT_NAME,
                                        name_mode='exact',
                                        return_handler=False)
    except:
        print 'Could not find 1 and only 1 project named {0}.'.format(
            ENCODE_CHIP_SEQ_PROJECT_NAME)

    return project['id']
Exemplo n.º 6
0
def get_project(projectName, level=None):
    '''Returns the DXProject by name or errors out if not found.'''
    try:
        project = dxpy.find_one_project(name=projectName,
                                        name_mode='exact',
                                        level=level,
                                        return_handler=False)
    except:
        print "Could not find 1 and only 1 project named '" + projectName + "'."
        sys.exit(1)

    return dxpy.DXProject(project['id'])
Exemplo n.º 7
0
def resolve_project(project_name, level=None):
    try:
        project = dxpy.find_one_project(name=project_name,
                                        name_mode='exact',
                                        level=level,
                                        return_handler=False)
    except:
        print 'Could not find 1 and only 1 project named %s; ' % format(
            project_name)
        sys.exit(1)

    return dxpy.DXProject(project['id'])
Exemplo n.º 8
0
def resolve_project(identifier, privs='r'):
    project = dxpy.find_one_project(name=identifier, level='VIEW', name_mode='exact', return_handler=True, zero_ok=True)
    if project == None:
        try:
            project = dxpy.get_handler(identifier)
        except:
            logging.error('Could not find a unique project with name or id %s' %(identifier))
            raise ValueError(identifier)
    logging.debug('Project %s access level is %s' %(project.name, project.describe()['level']))
    if privs == 'w' and project.describe()['level'] == 'VIEW':
        logging.error('Output project %s is read-only' %(identifier))
        raise ValueError(identifier)
    return project
Exemplo n.º 9
0
def resolve_project(identifier, privs='r'):
    project = dxpy.find_one_project(name=identifier, level='VIEW', name_mode='exact', return_handler=True, zero_ok=True)
    if project == None:
        try:
            project = dxpy.get_handler(identifier)
        except:
            logging.error('Could not find a unique project with name or id %s' %(identifier))
            raise ValueError(identifier)
    logging.debug('Project %s access level is %s' %(project.name, project.describe()['level']))
    if privs == 'w' and project.describe()['level'] == 'VIEW':
        logging.error('Output project %s is read-only' %(identifier))
        raise ValueError(identifier)
    return project
def resolve_project(identifier, privs="r"):
    project = dxpy.find_one_project(name=identifier, level="VIEW", name_mode="exact", return_handler=True, zero_ok=True)
    if project == None:
        try:
            project = dxpy.get_handler(identifier)
        except:
            logging.error("Could not find a unique project with name or id %s" % (identifier))
            raise ValueError(identifier)
    logging.debug("Project %s access level is %s" % (project.name, project.describe()["level"]))
    if privs == "w" and project.describe()["level"] == "VIEW":
        logging.error("Output project %s is read-only" % (identifier))
        raise ValueError(identifier)
    return project
Exemplo n.º 11
0
def find_reference_file_by_name(reference_name, project_name):
    '''Looks up a reference file by name in the project that holds common tools. From Joe Dale's code.'''
    project = dxpy.find_one_project(name=project_name, name_mode='exact', return_handler=False)
    cached = '*'
    if (reference_name, project['id']) not in REFERENCE_FILES:
        found = dxpy.find_one_data_object(classname="file", name=reference_name,
                                          project=project['id'],
                                          recurse=True,
                                          zero_ok=False, more_ok=False, return_handler=True)
        REFERENCE_FILES[(reference_name, project['id'])] = found
        cached = ''

    print cached + "Resolved %s to %s" % (reference_name, REFERENCE_FILES[(reference_name, project['id'])].get_id())
    return dxpy.dxlink(REFERENCE_FILES[(reference_name, project['id'])])
Exemplo n.º 12
0
def find_reference_file_by_name(reference_name, project_name):
    '''Looks up a reference file by name in the project that holds common tools. From Joe Dale's code.'''
    project = dxpy.find_one_project(name=project_name,
                                    name_mode='exact',
                                    return_handler=False)
    cached = '* '
    if (reference_name, project['id']) not in REFERENCE_FILES:
        found = dxpy.find_one_data_object(classname="file",
                                          name=reference_name,
                                          project=project['id'],
                                          recurse=True,
                                          zero_ok=False,
                                          more_ok=False,
                                          return_handler=True)
        REFERENCE_FILES[(reference_name, project['id'])] = found
        cached = ''

    #print >> sys.stderr, cached + "Resolved %s to %s" % \
    #                                                (reference_name, REFERENCE_FILES[(reference_name, project['id'])].get_id())
    return dxpy.dxlink(REFERENCE_FILES[(reference_name, project['id'])])
Exemplo n.º 13
0
    def __dx_find_one_project(self):
        """Find a single DNAnexus project from the input runfolder name

        Returns:
            A DNAnexus project ID. If the search fails, returns None.
        """
        try:
            # Search for the project matching self.runfolder.
            # name_mode='regexp' - look for any occurence of the runfolder name in the project name.
            # Setting more_ok/zero_ok to False ensures only one project is succesfully returned.
            project = dxpy.find_one_project(name=self.runfolder,
                                            name_mode='regexp',
                                            more_ok=False,
                                            zero_ok=False)
            self.logger.debug(
                f'{self.runfolder} DNAnexus project: {project["id"]}')
            return project['id']
        except dxpy.exceptions.DXSearchError:
            # Catch exception and raise none
            self.logger.debug(
                f'0 or >1 DNAnexus projects found for {self.runfolder}')
            return None
Exemplo n.º 14
0
    def __init__(self, project, ref_genome="1kg_v37", url_duration=ONE_YEAR):
        """
        :param project: 
        :param ref_genome: 
        :param url_duration: number of seconds for which the generated URL will be valid 
        """
        if isinstance(project, dxpy.DXProject):
            pass
        elif project.startswith("project-"):
            project = dxpy.DXProject(project)
        else:
            project = dxpy.DXProject(dxpy.find_one_project(name=project)["id"])

        assert isinstance(project, dxpy.DXProject)
        self.project = project

        Global = Element('Global')
        Global.set("name", project.name)
        Global.set("version", "1")
        self.Global = Global
        self.url_duration = url_duration
        self.genome = ref_genome
Exemplo n.º 15
0
def main():
    args = get_args()

    ## resolve projects
    project = resolve_project(ENCODE_DNA_ME_PROJECT_NAME)
    print 'Project: ' + project.describe()['name']
    print 'Experiment to analyze: ' + args.experiment
    if not project_has_folder(project, '/'+args.experiment):
        project.new_folder('/'+args.experiment)

    #TODO get all replicate ids from encoded DB from ENCSR (args.experiment)
    #TODO error out if ENCSR not found, status not complete etc.
    if args.test:
        source_id = project.get_id()
    else:
        source_id = resolve_project(ENCODE_SNAPSHOT_PROJECT, level='VIEW').get_id()

    ## resolve replicates/fastq inputs
    paired = args.paired
    if not paired:
        if len(args.replicates) < 1:
            sys.exit('Need to have at least 1 replicate file (unpaired) use -r or --replicates')

        replicates = find_replicates(args.replicates, source_id, project, args.experiment, args.test)
        if not replicates:
            print "No replicates found in project: " + project.name
            print "Looking for " + ", ".join(args.replicates)
            sys.exit(1)

        dx_reps = {
            'reads': [ dxpy.dxlink(r) for r in replicates ]
        }
        rnames = '-'.join([ r.split('.')[0] for r in args.replicates])
    else:
        if len(args.pair1) < 1 or len(args.pair2) < 1:
            sys.exit("Need to have at least 1 replicate in pair1 (--r1/--pair1) and pair2 (--r2/--pair2")

        pair1reps = find_replicates(args.pair1, source_id, project, args.experiment, args.test)
        if not pair1reps:
            print "No replicates for pair1 found in project: " + project.name
            print "Looking for " + ", ".join(args.pair1)
            sys.exit(1)

        pair2reps = find_replicates(args.pair2, source_id, project, args.experiment, args.test)
        if not pair2reps:
            print "No replicates for pair2 found in project: " + project.name
            print "Looking for " + ", ".join(args.pair2)
            sys.exit(1)

        dx_reps = {
            'pair1_reads': [ dxpy.dxlink(r) for r in pair1reps ],
            'pair2_reads': [ dxpy.dxlink(r) for r in pair2reps ]
        }
        rnames = '-'.join([ r.split('.')[0] for r in args.pair1+args.pair2])


    gender = args.gender
    organism = args.organism
    #TODO determine paired or gender from ENCSR metadata
    # Now create a new workflow ()
    spec_name = args.experiment+'-'+rnames
    title_root = 'dx_dna_me_'
    name_root = 'ENCODE Bismark DNA-ME pipeline: '
    desc = 'The ENCODE Bismark pipeline for WGBS shotgun methylation analysis for experiment'
    if paired:
        title_root = title_root + '_paired_end'
        name_root = name_root + '(paired-end)'
    else:
        title_root = title_root + '_single_end'
        name_root = name_root + '(single-end)'


    if args.export:
        project_id = dxpy.find_one_project(name=ENCODE_PUBLIC_PROJECT, name_mode='exact', return_handler=False)['id']
        wf = dxpy.new_dxworkflow(title=title_root,
                                 name=name_root,
                                 description=desc,
                                 folder=PUBLIC_FOLDER,
                                 project=project_id)
    else:
        project_id = project.get_id()
        wf = dxpy.new_dxworkflow(title='dx_dna_me_'+spec_name,
                             name='ENCODE Bismark DNA-ME pipeline: '+spec_name,
                             description='The ENCODE Bismark pipeline for WGBS shotgun methylation analysis for experiment' + args.experiment,
                             folder='/'+args.experiment,
                             project=project.get_id())

    populate_workflow(wf, dx_reps, args.experiment, paired, gender, organism, project.id, args.export)
Exemplo n.º 16
0
def main():
    args = get_args()
    if len(args.replicates) < 1:
        sys.exit('Need to have at least 1 replicate file.')

    project = resolve_project(ENCODE_DNA_ME_PROJECT_NAME)
    print 'Project: ' + project.describe()['name']
    print 'Experiment to analyze: ' + args.experiment
    if not project_has_folder(project, '/' + args.experiment):
        project.new_folder('/' + args.experiment)

    #TODO get all replicate ids from encoded DB from ENCSR (args.experiment)
    #TODO error out if ENCSR not found, status not complete etc.
    if args.test:
        source_id = project.get_id()
    else:
        source_id = resolve_project(ENCODE_SNAPSHOT_PROJECT,
                                    level='VIEW').get_id()

    replicates = []
    for rep in args.replicates:
        dx_rep = dxpy.find_data_objects(classname='file',
                                        name=rep,
                                        name_mode='exact',
                                        project=source_id,
                                        return_handler=False)
        replicates.extend(dx_rep)

    if not args.test:
        replicates = copy_files(replicates, project.get_id(),
                                "/" + args.experiment)

    if not replicates:
        print "No replicates found in project: " + project.name
        print "Looking for " + ", ".join(args.replicates)
        sys.exit(1)

    inputs = {'rnd_seed': 12345}
    inputs['paired'] = args.paired
    inputs['gender'] = args.gender
    inputs['organism'] = args.organism
    inputs['library_id'] = args.library
    inputs['nthreads'] = args.nthreads
    #TODO determine paired or gender from ENCSR metadata
    # Now create a new workflow ()
    inputs['spec_name'] = args.experiment + '-' + '-'.join(
        [r.split('.')[0] for r in args.replicates])
    title_root = 'dx_long_rna_seq_'
    name_root = 'ENCODE Long RNA Seq: '
    desc = 'The ENCODE RNA Seq pipeline for long RNAs'
    if args.paired:
        title_root = title_root + '_paired_end '
        name_root = name_root + '(paired-end) '
        inputs['stranded'] = True
    else:
        title_root = title_root + '_single_end '
        name_root = name_root + '(single-end) '
        inputs['stranded'] = False

    if args.export:
        project_id = dxpy.find_one_project(name=ENCODE_PUBLIC_PROJECT,
                                           name_mode='exact',
                                           return_handler=False)['id']
        wf = dxpy.new_dxworkflow(title=title_root,
                                 name=name_root,
                                 description=desc,
                                 folder=PUBLIC_FOLDER,
                                 project=project_id)
    else:
        project_id = project.get_id()
        wf = dxpy.new_dxworkflow(title=title_root + inputs['spec_name'],
                                 name=name_root + inputs['spec_name'],
                                 description=desc + ' for experiment:' +
                                 args.experiment,
                                 folder='/' + args.experiment,
                                 project=project.get_id())

    populate_workflow(wf, replicates, args.experiment, inputs, project.id,
                      args.export)
Exemplo n.º 17
0
def main():
    args = get_args()
    if len(args.replicates) < 1:
        sys.exit('Need to have at least 1 replicate file.')

    project = resolve_project(ENCODE_DNA_ME_PROJECT_NAME)
    print 'Project: ' + project.describe()['name']
    print 'Experiment to analyze: ' + args.experiment
    if not project_has_folder(project, '/'+args.experiment):
        project.new_folder('/'+args.experiment)

    #TODO get all replicate ids from encoded DB from ENCSR (args.experiment)
    #TODO error out if ENCSR not found, status not complete etc.
    if args.test:
        source_id = project.get_id()
    else:
        source_id = resolve_project(ENCODE_SNAPSHOT_PROJECT, level='VIEW').get_id()

    replicates = []
    for rep in args.replicates:
        dx_rep = dxpy.find_data_objects(classname='file', name=rep,
                                        name_mode='exact', project=source_id,
                                        return_handler=False)
        replicates.extend(dx_rep)

    if not args.test:
        replicates = copy_files(replicates, project.get_id(), "/"+args.experiment)

    if not replicates:
        print "No replicates found in project: " + project.name
        print "Looking for " + ", ".join(args.replicates)
        sys.exit(1)

    inputs = {
        'rnd_seed': 12345
    }
    inputs['paired'] = args.paired
    inputs['gender']= args.gender
    inputs['organism'] = args.organism
    inputs['library_id'] = args.library
    inputs['nthreads'] = args.nthreads
    #TODO determine paired or gender from ENCSR metadata
    # Now create a new workflow ()
    inputs['spec_name'] = args.experiment+'-'+'-'.join([ r.split('.')[0] for r in args.replicates])
    title_root = 'dx_long_rna_seq_'
    name_root = 'ENCODE Long RNA Seq: '
    desc = 'The ENCODE RNA Seq pipeline for long RNAs'
    if args.paired:
        title_root = title_root + '_paired_end '
        name_root = name_root + '(paired-end) '
        inputs['stranded'] = True
    else:
        title_root = title_root + '_single_end '
        name_root = name_root + '(single-end) '
        inputs['stranded'] = False


    if args.export:
        project_id = dxpy.find_one_project(name=ENCODE_PUBLIC_PROJECT, name_mode='exact', return_handler=False)['id']
        wf = dxpy.new_dxworkflow(title=title_root,
                                 name=name_root,
                                 description=desc,
                                 folder=PUBLIC_FOLDER,
                                 project=project_id)
    else:
        project_id = project.get_id()
        wf = dxpy.new_dxworkflow(title=title_root+inputs['spec_name'],
                             name=name_root+inputs['spec_name'],
                             description=desc+' for experiment:' + args.experiment,
                             folder='/'+args.experiment,
                             project=project.get_id())

    populate_workflow(wf, replicates, args.experiment, inputs, project.id, args.export)
Exemplo n.º 18
0
def main():
    args = get_args()

    ## resolve projects
    project = resolve_project(ENCODE_DNA_ME_PROJECT_NAME)
    print 'Project: ' + project.describe()['name']
    print 'Experiment to analyze: ' + args.experiment
    if not project_has_folder(project, '/' + args.experiment):
        project.new_folder('/' + args.experiment)

    #TODO get all replicate ids from encoded DB from ENCSR (args.experiment)
    #TODO error out if ENCSR not found, status not complete etc.
    if args.test:
        source_id = project.get_id()
    else:
        source_id = resolve_project(ENCODE_SNAPSHOT_PROJECT,
                                    level='VIEW').get_id()

    ## resolve replicates/fastq inputs
    paired = args.paired
    if not paired:
        if len(args.replicates) < 1:
            sys.exit(
                'Need to have at least 1 replicate file (unpaired) use -r or --replicates'
            )

        replicates = find_replicates(args.replicates, source_id, project,
                                     args.experiment, args.test)
        if not replicates:
            print "No replicates found in project: " + project.name
            print "Looking for " + ", ".join(args.replicates)
            sys.exit(1)

        dx_reps = {'reads': [dxpy.dxlink(r) for r in replicates]}
        rnames = '-'.join([r.split('.')[0] for r in args.replicates])
    else:
        if len(args.pair1) < 1 or len(args.pair2) < 1:
            sys.exit(
                "Need to have at least 1 replicate in pair1 (--r1/--pair1) and pair2 (--r2/--pair2"
            )

        pair1reps = find_replicates(args.pair1, source_id, project,
                                    args.experiment, args.test)
        if not pair1reps:
            print "No replicates for pair1 found in project: " + project.name
            print "Looking for " + ", ".join(args.pair1)
            sys.exit(1)

        pair2reps = find_replicates(args.pair2, source_id, project,
                                    args.experiment, args.test)
        if not pair2reps:
            print "No replicates for pair2 found in project: " + project.name
            print "Looking for " + ", ".join(args.pair2)
            sys.exit(1)

        dx_reps = {
            'pair1_reads': [dxpy.dxlink(r) for r in pair1reps],
            'pair2_reads': [dxpy.dxlink(r) for r in pair2reps]
        }
        rnames = '-'.join([r.split('.')[0] for r in args.pair1 + args.pair2])

    gender = args.gender
    organism = args.organism
    #TODO determine paired or gender from ENCSR metadata
    # Now create a new workflow ()
    spec_name = args.experiment + '-' + rnames
    title_root = 'dx_dna_me_'
    name_root = 'ENCODE Bismark DNA-ME pipeline: '
    desc = 'The ENCODE Bismark pipeline for WGBS shotgun methylation analysis for experiment'
    if paired:
        title_root = title_root + '_paired_end'
        name_root = name_root + '(paired-end)'
    else:
        title_root = title_root + '_single_end'
        name_root = name_root + '(single-end)'

    if args.export:
        project_id = dxpy.find_one_project(name=ENCODE_PUBLIC_PROJECT,
                                           name_mode='exact',
                                           return_handler=False)['id']
        wf = dxpy.new_dxworkflow(title=title_root,
                                 name=name_root,
                                 description=desc,
                                 folder=PUBLIC_FOLDER,
                                 project=project_id)
    else:
        project_id = project.get_id()
        wf = dxpy.new_dxworkflow(
            title='dx_dna_me_' + spec_name,
            name='ENCODE Bismark DNA-ME pipeline: ' + spec_name,
            description=
            'The ENCODE Bismark pipeline for WGBS shotgun methylation analysis for experiment'
            + args.experiment,
            folder='/' + args.experiment,
            project=project.get_id())

    populate_workflow(wf, dx_reps, args.experiment, paired, gender, organism,
                      project.id, args.export)
Exemplo n.º 19
0
def path_completer(text, expected=None, classes=None, perm_level=None,
                   include_current_proj=False, typespec=None, visibility=None):
    '''
    :param text: String to tab-complete to a path matching the syntax project-name:folder/entity_or_folder_name
    :type text: string
    :param expected: "folder", "entity", "project", or None (no restriction) as to the types of answers to look for
    :type expected: string
    :param classes: if expected="entity", the possible data object classes that are acceptable
    :type classes: list of strings
    :param perm_level: the minimum permissions level required, e.g. "VIEW" or "CONTRIBUTE"
    :type perm_level: string
    :param include_current_proj: Indicate whether the current project's name should be a potential result
    :type include_current_proj: boolean
    :param visibility: Visibility with which to restrict the completion (one of "either", "visible", or "hidden") (default behavior is dependent on *text*)

    Returns a list of matches to the text and restricted by the
    requested parameters.
    '''

    colon_pos = get_last_pos_of_char(':', text)
    slash_pos = get_last_pos_of_char('/', text)
    delim_pos = max(colon_pos, slash_pos)

    # First get projects if necessary
    matches = []
    if expected == 'project' and colon_pos > 0 and colon_pos == len(text) - 1:
        if dxpy.find_one_project(zero_ok=True, name=text[:colon_pos]) is not None:
            return [text + " "]

    if colon_pos < 0 and slash_pos < 0:
        # Might be tab-completing a project, but don't ever include
        # whatever's set as dxpy.WORKSPACE_ID unless expected == "project"
        # Also, don't bother if text=="" and expected is NOT "project"
        # Also, add space if expected == "project"
        if text != "" or expected == 'project':
            results = dxpy.find_projects(describe=True, level=perm_level)
            if not include_current_proj:
                results = [r for r in results if r['id'] != dxpy.WORKSPACE_ID]
            matches += [escape_colon(r['describe']['name'])+':' for r in results if r['describe']['name'].startswith(text)]

    if expected == 'project':
        return matches

    # Attempt to tab-complete to a folder or data object name
    if colon_pos < 0 and slash_pos >= 0:
        # Not tab-completing a project, and the project is unambiguous
        # (use dxpy.WORKSPACE_ID)
        if dxpy.WORKSPACE_ID is not None:
            # try-catch block in case dxpy.WORKSPACE_ID is garbage
            try:
                dxproj = dxpy.get_handler(dxpy.WORKSPACE_ID)
                folderpath, entity_name = clean_folder_path(text)
                matches += get_folder_matches(text, slash_pos, dxproj, folderpath)
                if expected != 'folder':
                    if classes is not None:
                        for classname in classes:
                            matches += get_data_matches(text, slash_pos, dxproj,
                                                        folderpath, classname=classname,
                                                        typespec=typespec,
                                                        visibility=visibility)
                    else:
                        matches += get_data_matches(text, slash_pos, dxproj,
                                                    folderpath, typespec=typespec,
                                                    visibility=visibility)
            except:
                pass
    else:
        # project is given by a path, but attempt to resolve to an
        # object or folder anyway
        try:
            proj_ids, folderpath, entity_name = resolve_path(text, multi_projects=True)
        except ResolutionError as details:
            sys.stderr.write("\n" + fill(unicode(details)))
            return matches
        for proj in proj_ids:
            # protects against dxpy.WORKSPACE_ID being garbage
            try:
                dxproj = dxpy.get_handler(proj)
                matches += get_folder_matches(text, delim_pos, dxproj, folderpath)
                if expected != 'folder':
                    if classes is not None:
                        for classname in classes:
                            matches += get_data_matches(text, delim_pos, dxproj,
                                                        folderpath, classname=classname,
                                                        typespec=typespec, visibility=visibility)
                    else:
                        matches += get_data_matches(text, delim_pos, dxproj,
                                                    folderpath, typespec=typespec,
                                                    visibility=visibility)
            except:
                pass
    return matches
Exemplo n.º 20
0
def interactive_help(in_class, param_desc, prompt):
    is_array = param_desc['class'].startswith("array:")
    print_param_help(param_desc)
    print()
    array_help_str = ', or <ENTER> to finish the list of inputs'
    if in_class in dx_data_classes:
        # Class is some sort of data object
        if dxpy.WORKSPACE_ID is not None:
            proj_name = None
            try:
                proj_name = dxpy.api.project_describe(dxpy.WORKSPACE_ID)['name']
            except:
                pass
            if proj_name is not None:
                print('Your current working directory is ' + proj_name + ':' + dxpy.config.get('DX_CLI_WD', '/'))
        while True:
            print('Pick an option to find input data:')
            try:
                opt_num = pick(['List and choose from available data in the current project',
                                'List and choose from available data in the DNAnexus Reference Genomes project',
                                'Select another project to list and choose available data',
                                'Select an output from a previously-run job (current project only)',
                                'Return to original prompt (specify an ID or path directly)'])
            except KeyboardInterrupt:
                opt_num = 4
            if opt_num == 0:
                query_project = dxpy.WORKSPACE_ID
            elif opt_num == 1:
                query_project = dxpy.find_one_project(name="Reference Genome Files", public=True, billed_to="org-dnanexus", level="VIEW")['id']
            elif opt_num == 2:
                project_generator = dxpy.find_projects(level='VIEW', describe=True, explicit_perms=True)
                print('\nProjects to choose from:')
                query_project = paginate_and_pick(project_generator, (lambda result: result['describe']['name']))['id']
            if opt_num in range(3):
                result_generator = dxpy.find_data_objects(classname=in_class,
                                                          typename=param_desc.get('type'),
                                                          describe=True,
                                                          project=query_project)
                print('\nAvailable data:')
                result_choice = paginate_and_pick(result_generator,
                                                  (lambda result: get_ls_l_desc(result['describe'])))
                if result_choice == 'none found':
                    print('No compatible data found')
                    continue
                elif result_choice == 'none picked':
                    continue
                else:
                    return [result_choice['project'] + ':' + result_choice['id']]
            elif opt_num == 3:
                # Select from previous jobs in current project
                result_generator = dxpy.find_jobs(project=dxpy.WORKSPACE_ID,
                                                  describe=True,
                                                  parent_job="none")
                print()
                print('Previously-run jobs to choose from:')
                result_choice = paginate_and_pick(result_generator,
                                                  (lambda result: get_find_executions_string(result['describe'],
                                                                                             has_children=False,
                                                                                             single_result=True)),
                                                  filter_fn=(lambda result: result['describe']['state'] not in ['unresponsive', 'terminating', 'terminated', 'failed']))
                if result_choice == 'none found':
                    print('No jobs found')
                    continue
                elif result_choice == 'none picked':
                    continue
                else:
                    if 'output' in result_choice['describe'] and result_choice['describe']['output'] != None:
                        keys = result_choice['describe']['output'].keys()
                    else:
                        exec_handler = dxpy.get_handler(result_choice.get('app', result_choice['applet']))
                        exec_desc = exec_handler.describe()
                        if 'outputSpec' not in exec_desc:
                            # This if block will either continue, return, or raise
                            print('No output spec found for the executable')
                            try:
                                field = input('Output field to use (^C or <ENTER> to cancel): ')
                                if field == '':
                                    continue
                                else:
                                    return [result_choice['id'] + ':' + field]
                            except KeyboardInterrupt:
                                continue
                        else:
                            keys = exec_desc['outputSpec'].keys()
                    if len(keys) > 1:
                        print('\nOutput fields to choose from:')
                        field_choice = pick(keys)
                        return [result_choice['id'] + ':' + keys[field_choice]]
                    elif len(keys) == 1:
                        print('Using the only output field: ' + keys[0])
                        return [result_choice['id'] + ':' + keys[0]]
                    else:
                        print('No available output fields')
            else:
                print(fill('Enter an ID or path (<TAB> twice for compatible ' + in_class + 's in current directory)' + (array_help_str if is_array else '')))
                return shlex.split(input(prompt))
    else:
        if in_class == 'boolean':
            if is_array:
                print(fill('Enter "true", "false"' + array_help_str))
            else:
                print(fill('Enter "true" or "false"'))
        elif in_class == 'string' and is_array:
                print(fill('Enter a nonempty string' + array_help_str))
        elif (in_class == 'float' or in_class == 'int') and is_array:
            print(fill('Enter a number' + array_help_str))
        elif in_class == 'hash':
            print(fill('Enter a quoted JSON hash'))
        result = input(prompt)
        if in_class == 'string':
            return [result]
        else:
            return shlex.split(result)
Exemplo n.º 21
0
def path_completer(text, expected=None, classes=None, perm_level=None,
                   include_current_proj=False, typespec=None, visibility=None):
    '''
    :param text: String to tab-complete to a path matching the syntax project-name:folder/entity_or_folder_name
    :type text: string
    :param expected: "folder", "entity", "project", or None (no restriction) as to the types of answers to look for
    :type expected: string
    :param classes: if expected="entity", the possible data object classes that are acceptable
    :type classes: list of strings
    :param perm_level: the minimum permissions level required, e.g. "VIEW" or "CONTRIBUTE"
    :type perm_level: string
    :param include_current_proj: Indicate whether the current project's name should be a potential result
    :type include_current_proj: boolean
    :param visibility: Visibility with which to restrict the completion (one of "either", "visible", or "hidden") (default behavior is dependent on *text*)

    Returns a list of matches to the text and restricted by the
    requested parameters.
    '''

    colon_pos = get_last_pos_of_char(':', text)
    slash_pos = get_last_pos_of_char('/', text)
    delim_pos = max(colon_pos, slash_pos)

    # First get projects if necessary
    matches = []
    if expected == 'project' and colon_pos > 0 and colon_pos == len(text) - 1:
        if dxpy.find_one_project(zero_ok=True, name=text[:colon_pos]) is not None:
            return [text + " "]

    if colon_pos < 0 and slash_pos < 0:
        # Might be tab-completing a project, but don't ever include
        # whatever's set as dxpy.WORKSPACE_ID unless expected == "project"
        # Also, don't bother if text=="" and expected is NOT "project"
        # Also, add space if expected == "project"
        if text != "" or expected == 'project':
            results = dxpy.find_projects(describe=True, level=perm_level)
            if not include_current_proj:
                results = [r for r in results if r['id'] != dxpy.WORKSPACE_ID]
            matches += [escape_colon(r['describe']['name'])+':' for r in results if r['describe']['name'].startswith(text)]

    if expected == 'project':
        return matches

    # Attempt to tab-complete to a folder or data object name
    if colon_pos < 0 and slash_pos >= 0:
        # Not tab-completing a project, and the project is unambiguous
        # (use dxpy.WORKSPACE_ID)
        if dxpy.WORKSPACE_ID is not None:
            # try-catch block in case dxpy.WORKSPACE_ID is garbage
            try:
                dxproj = dxpy.get_handler(dxpy.WORKSPACE_ID)
                folderpath, entity_name = clean_folder_path(text)
                matches += get_folder_matches(text, slash_pos, dxproj, folderpath)
                if expected != 'folder':
                    if classes is not None:
                        for classname in classes:
                            matches += get_data_matches(text, slash_pos, dxproj,
                                                        folderpath, classname=classname,
                                                        typespec=typespec,
                                                        visibility=visibility)
                    else:
                        matches += get_data_matches(text, slash_pos, dxproj,
                                                    folderpath, typespec=typespec,
                                                    visibility=visibility)
            except:
                pass
    else:
        # project is given by a path, but attempt to resolve to an
        # object or folder anyway
        try:
            proj_ids, folderpath, entity_name = resolve_path(text, multi_projects=True)
        except ResolutionError as details:
            sys.stderr.write("\n" + fill(str(details)))
            return matches
        for proj in proj_ids:
            # protects against dxpy.WORKSPACE_ID being garbage
            try:
                dxproj = dxpy.get_handler(proj)
                matches += get_folder_matches(text, delim_pos, dxproj, folderpath)
                if expected != 'folder':
                    if classes is not None:
                        for classname in classes:
                            matches += get_data_matches(text, delim_pos, dxproj,
                                                        folderpath, classname=classname,
                                                        typespec=typespec, visibility=visibility)
                    else:
                        matches += get_data_matches(text, delim_pos, dxproj,
                                                    folderpath, typespec=typespec,
                                                    visibility=visibility)
            except:
                pass
    return matches
Exemplo n.º 22
0
def interactive_help(in_class, param_desc, prompt):
    is_array = param_desc['class'].startswith("array:")
    print_param_help(param_desc)
    print()
    array_help_str = ', or <ENTER> to finish the list of inputs'
    if in_class in dx_data_classes:
        # Class is some sort of data object
        if dxpy.WORKSPACE_ID is not None:
            proj_name = None
            try:
                proj_name = dxpy.api.project_describe(dxpy.WORKSPACE_ID)['name']
            except:
                pass
            if proj_name is not None:
                print('Your current working directory is ' + proj_name + ':' + dxpy.config.get('DX_CLI_WD', '/'))
        while True:
            print('Pick an option to find input data:')
            try:
                opt_num = pick(['List and choose from available data in the current project',
                                'List and choose from available data in the DNAnexus Reference Genomes Files project',
                                'Select another project to list and choose available data',
                                'Select an output from a previously-run job (current project only)',
                                'Return to original prompt (specify an ID or path directly)'])
            except KeyboardInterrupt:
                opt_num = 4
            if opt_num == 0:
                query_project = dxpy.WORKSPACE_ID
            elif opt_num == 1:
                region = None
                if dxpy.WORKSPACE_ID:
                    region = dxpy.describe(dxpy.WORKSPACE_ID).get("region")
                query_project = dxpy.find_one_project(name="Reference Genome Files:*", public=True, billed_to="org-dnanexus_apps", level="VIEW", name_mode="glob", region=region)['id']
            elif opt_num == 2:
                project_generator = dxpy.find_projects(level='VIEW', describe=True, explicit_perms=True)
                print('\nProjects to choose from:')
                query_project = paginate_and_pick(project_generator, (lambda result: result['describe']['name']))['id']
            if opt_num in range(3):
                result_generator = dxpy.find_data_objects(classname=in_class,
                                                          typename=param_desc.get('type'),
                                                          describe=dict(fields=get_ls_l_desc_fields()),
                                                          project=query_project)
                print('\nAvailable data:')
                result_choice = paginate_and_pick(result_generator,
                                                  (lambda result: get_ls_l_desc(result['describe'])))
                if result_choice == 'none found':
                    print('No compatible data found')
                    continue
                elif result_choice == 'none picked':
                    continue
                else:
                    return [result_choice['project'] + ':' + result_choice['id']]
            elif opt_num == 3:
                # Select from previous jobs in current project
                result_generator = dxpy.find_jobs(project=dxpy.WORKSPACE_ID,
                                                  describe=True,
                                                  parent_job="none")
                print()
                print('Previously-run jobs to choose from:')
                result_choice = paginate_and_pick(result_generator,
                                                  (lambda result: get_find_executions_string(result['describe'],
                                                                                             has_children=False,
                                                                                             single_result=True)),
                                                  filter_fn=(lambda result: result['describe']['state'] not in ['unresponsive', 'terminating', 'terminated', 'failed']))
                if result_choice == 'none found':
                    print('No jobs found')
                    continue
                elif result_choice == 'none picked':
                    continue
                else:
                    if 'output' in result_choice['describe'] and result_choice['describe']['output'] != None:
                        keys = result_choice['describe']['output'].keys()
                    else:
                        exec_handler = dxpy.get_handler(result_choice.get('app', result_choice['applet']))
                        exec_desc = exec_handler.describe()
                        if 'outputSpec' not in exec_desc:
                            # This if block will either continue, return, or raise
                            print('No output spec found for the executable')
                            try:
                                field = input('Output field to use (^C or <ENTER> to cancel): ')
                                if field == '':
                                    continue
                                else:
                                    return [result_choice['id'] + ':' + field]
                            except KeyboardInterrupt:
                                continue
                        else:
                            keys = exec_desc['outputSpec'].keys()
                    if len(keys) > 1:
                        print('\nOutput fields to choose from:')
                        field_choice = pick(keys)
                        return [result_choice['id'] + ':' + keys[field_choice]]
                    elif len(keys) == 1:
                        print('Using the only output field: ' + keys[0])
                        return [result_choice['id'] + ':' + keys[0]]
                    else:
                        print('No available output fields')
            else:
                print(fill('Enter an ID or path (<TAB> twice for compatible ' + in_class + 's in current directory)' + (array_help_str if is_array else '')))
                return shlex.split(input(prompt))
    else:
        if in_class == 'boolean':
            if is_array:
                print(fill('Enter "true", "false"' + array_help_str))
            else:
                print(fill('Enter "true" or "false"'))
        elif in_class == 'string' and is_array:
                print(fill('Enter a nonempty string' + array_help_str))
        elif (in_class == 'float' or in_class == 'int') and is_array:
            print(fill('Enter a number' + array_help_str))
        elif in_class == 'hash':
            print(fill('Enter a quoted JSON hash'))
        result = input(prompt)
        if in_class == 'string':
            return [result]
        else:
            return shlex.split(result)
    def _set_dxproject_id(self, latest_project=False):
        """
        Searches for the project in DNAnexus based on the input arguments when instantiating the class. 
        If multiple projects are found based on the search criteria, an exception will be raised. A 
        few various search strategies are employed, based on the input arguments. In all cases, if the 
        'billing_account_id' was specifed, all searches will search for projects only belonging to the 
        specified billing account. The search strategies work as follows: If the project ID was
        provided, the search will attempt to find the project by ID only. If the project ID wasn't 
        provided, but the project name was specified, then the search will attempt to find the project
        by name and by any project properties that may have been set during instantiation 
        (uhts_run_name, sequencing_lane, and library_name). If neither the project name nor the 
        project ID was specified, then a search by whichever project properties were specified will take place.
    
        This method will not set the self.dx_project_id if none of the search methods are successful 
        in finding a single project, and this may indicate that the sequencing hasn't finished yet.
    
        Args:
            latest_project: `bool`. True indicates that if multiple projects are found given the search 
            criteria, the most recently created project will be returned.
    
        Returns: 
            `str`. The DNAnexus project ID or the empty string if a project wasn't found.
     
        Raises: 
            `scgpm_seqresults_dnanexus.dnanexus_utils.DxMultipleProjectsWithSameLibraryName()`: The 
                search is by self.library_name, and multiple DNAnexus projects have that library name.

            `DxMissingLibraryNameProperty`: The DNAnexus project property 'library_name' is not present. 
        """
        dx_project_props = {}
        if self.library_name:
            dx_project_props["library_name"] = self.library_name
        if self.uhts_run_name:
            dx_project_props["seq_run_name"] = self.uhts_run_name
        if self.sequencing_lane:
            dx_project_props["seq_lane_index"] = str(self.sequencing_lane)

        dx_proj = ""
        if self.dx_project_id:
            prefix = "project-"
            if not self.dx_project_id.startswith(prefix):
                self.dx_project_id = prefix + self.dx_project_id
            dx_proj = dxpy.DXProject(dxid=self.dx_project_id)
        elif self.dx_project_name:
            res = dxpy.find_one_project(properties=dx_project_props,
                                        billed_to=self.billing_account_id,
                                        zero_ok=True,
                                        more_ok=False,
                                        name=self.dx_project_name)
            if res:
                dx_proj = dxpy.DXProject(dxid=res["id"])
        else:
            #try to find by library_name and potential uhts_run_name
            res = list(
                dxpy.find_projects(properties=dx_project_props,
                                   billed_to=self.billing_account_id))
            if len(res) == 1:
                dx_proj = dxpy.DXProject(dxid=res[0]["id"])
            elif len(res) > 1:
                dx_proj_ids = [x["id"] for x in res]
                if not latest_project:
                    raise DxMultipleProjectsWithSameLibraryName(
                        "Error - Multiple DNAnexus projects have the same value for the library_name property value of {library_name}. The projects are {dx_proj_ids}."
                        .format(library_name=self.library_name,
                                dx_proj_ids=dx_proj_ids))
                dx_proj = gbsc_dnanexus.utils.select_newest_project(
                    dx_project_ids=dx_proj_ids)

        if not dx_proj:
            return

        self.dx_project = dx_proj
        self.dx_project_id = dx_proj.id
        self.dx_project_name = dx_proj.name
        self.dx_project_props = dxpy.api.project_describe(
            object_id=dx_proj.id,
            input_params={"fields": {
                "properties": True
            }})["properties"]
        try:
            self.library_name = self.dx_project_props["library_name"]
        except KeyError:
            msg = "DNAnexus project {} is missing the library_name property.".format(
                self.dx_project_name)
            raise DxMissingLibraryNameProperty(msg)
Exemplo n.º 24
0
def main():
    argparser = argparse.ArgumentParser(description="Create a release for dxWDL")
    argparser.add_argument("--folder", help="Release folder that already exists")
    argparser.add_argument("--project", help="Project where to place release", default="dxWDL")
    args = argparser.parse_args()

    # resolve project
    print("resolving project {}".format(args.project))
    project = dxpy.find_one_project(name = args.project, more_ok=False, return_handler=True)

    # Create release folder, if needed
    if args.folder is None:
        folder = time.strftime("/releases/%Y-%m-%d/%H%M%S")
        project.new_folder(folder, parents=True)
        make_prerequisits(project, folder)
        print("Uploading jar files")
        upload_libs(project, folder)
    else:
        folder = args.folder

    # Figure out what the current version is
    version_id = release_version()
    print('version_id="{}"'.format(version_id))

    print("resolving dxWDL runtime asset")
    asset = dxpy.search.find_one_data_object(classname="record",
                                             project=project.get_id(),
                                             name="dxWDLrt",
                                             folder=folder,
                                             return_handler=True,
                                             more_ok=False)
    print("assetId={}".format(asset.get_id()))

    print("resolving jar files -- validation ")
    objs = []
    for lib in required_libs:
        objs.append({ "name" : lib, "folder" : folder })
    descs = list(dxpy.search.resolve_data_objects(objs, project=project.get_id()))
    lib_object_ids=[]
    for d in descs:
        print(d[0])
        lib_object_ids.append(d[0]["id"])
    print(lib_object_ids)

    # embed configuration information into dxWDL script
    print("Embedding configuration into dxWDL script")
    script = None
    with open(os.path.join(top_dir,'dxWDL'), 'r') as fd:
        script = fd.read()
    script = script.replace('version_id = None\n',
                            'version_id = "{}"\n'.format(version_id))
    script = script.replace('asset_id = None\n',
                            'asset_id = "{}"\n'.format(asset.get_id()))
    script = script.replace('project_id = None\n',
                            'project_id = "{}"\n'.format(project.get_id()))
    script = script.replace('lib_object_ids = None\n',
                            'lib_object_ids = {}\n'.format(lib_object_ids))
    rm_silent('/tmp/dxWDL')
    rm_silent('/tmp/dxWDL_latest')
    with open('/tmp/dxWDL', 'w') as fd:
        fd.write(script)
    upload_script(project, folder)