Exemplo n.º 1
0
 def format_data_object_reference(item):
     if dxpy.is_dxlink(item):
         # Bare dxlink
         obj_id, proj_id = dxpy.get_dxlink_ids(item)
         return (proj_id + ":" if proj_id else '') + obj_id
     if dxpy.is_dxlink(item.get('value')):
         # value is set
         obj_id, proj_id = dxpy.get_dxlink_ids(item['value'])
         return (proj_id + ":" if proj_id else '') + obj_id + (' (%s)' % item['name'] if item.get('name') else '')
     if item.get('project') and item.get('path'):
         # project and folder path
         return item['project'] + ':' + item['path'] + "/" + obj_class + "-*" +  (' (%s)' % item['name'] if item.get('name') else '')
     return str(item)
Exemplo n.º 2
0
 def format_data_object_reference(item):
     if dxpy.is_dxlink(item):
         # Bare dxlink
         obj_id, proj_id = dxpy.get_dxlink_ids(item)
         return (proj_id + ":" if proj_id else '') + obj_id
     if dxpy.is_dxlink(item.get('value')):
         # value is set
         obj_id, proj_id = dxpy.get_dxlink_ids(item['value'])
         return (proj_id + ":" if proj_id else '') + obj_id + (' (%s)' % item['name'] if item.get('name') else '')
     if item.get('project') and item.get('path'):
         # project and folder path
         return item['project'] + ':' + item['path'] + "/" + obj_class + "-*" +  (' (%s)' % item['name'] if item.get('name') else '')
     return str(item)
Exemplo n.º 3
0
    def unpack_tar(self, tar_file_dxlink):
        '''
        DEV: Eventually integrate dx-toolkit into trajectoread repo so I can 
             transition to using 'dx-download-all-inputs' to handle unpacking
             all input files.
             Pipeline used to store lane file dxids as project properties 
             and then pass to "dx download"
        Description: Download and untar metadata and lane data files 
                     (/Data/Intensities/BaseCalls)
        '''

        if dxpy.is_dxlink(tar_file_dxlink):
            file_handler = dxpy.get_handler(tar_file_dxlink)
            filename = file_handler.name
        else:
            print 'Error: Cannot unpack %s; not a valid DXLink object'
            sys.exit()

        # ('file-dxid', 'project-dxid') = dxpy.get_dxlink_ids(dxlink)
        file_dxid = dxpy.get_dxlink_ids(tar_file_dxlink)[0]
        project_id = dxpy.get_dxlink_ids(tar_file_dxlink)[1]

        # Download file from DNAnexus objectstore to virtual machine
        dxpy.download_dxfile(dxid=file_dxid,
                             filename=filename,
                             project=project_id)

        # Untar file
        ## DEV: Check if this is even in use anymore; also should have some method for
        ## checking what type of compression was used.
        ## But I don't think this is in use
        command = 'tar -xf %s --owner root --group root --no-same-owner' % filename
        self.createSubprocess(cmd=command, pipeStdout=False)
Exemplo n.º 4
0
 def add_file(iname, subdir, value):
     if not dxpy.is_dxlink(value):
         return
     handler = dxpy.get_handler(value)
     if not isinstance(handler, dxpy.DXFile):
         return
     filename = make_unix_filename(handler.name)
     trg_dir = iname
     if subdir is not None:
         trg_dir = os.path.join(trg_dir, subdir)
     files[iname].append({'trg_fname': os.path.join(trg_dir, filename),
                          'handler': handler,
                          'src_file_id': handler.id})
     dirs.append(trg_dir)
Exemplo n.º 5
0
 def add_file(iname, subdir, value):
     if not dxpy.is_dxlink(value):
         return
     handler = dxpy.get_handler(value)
     if not isinstance(handler, dxpy.DXFile):
         return
     filename = make_unix_filename(handler.name)
     trg_dir = iname
     if subdir is not None:
         trg_dir = os.path.join(trg_dir, subdir)
     files[iname].append({
         'trg_fname': os.path.join(trg_dir, filename),
         'handler': handler,
         'src_file_id': handler.id
     })
     dirs.append(trg_dir)
Exemplo n.º 6
0
 def _resolve_output(self, value):
     if dxpy.is_dxlink(value):
         dxfile = dxpy.DXFile(value)
         file_id = dxfile.get_id()
         if file_id not in DxWdlExecutor._data_cache:
             # Store each file in a subdirectory named by it's ID to avoid
             # naming collisions
             cache_dir = self._dxwdl_cache_dir / file_id
             cache_dir.mkdir(parents=True)
             filename = cache_dir / dxfile.describe()["name"]
             dxpy.download_dxfile(dxfile, filename)
             DxWdlExecutor._data_cache[file_id] = filename
         return DxWdlExecutor._data_cache[file_id]
     elif isinstance(value, dict):
         return {
             key: self._resolve_output(val)
             for key, val in cast(dict, value).items()
         }
     elif isinstance(value, Sequence) and not isinstance(value, str):
         return [self._resolve_output(val) for val in cast(Sequence, value)]
     else:
         return value
def main():

    args = get_args()
    if args.debug:
        logger.setLevel(logging.DEBUG)
    else:
        logger.setLevel(logging.INFO)

    authid, authpw, server = common.processkey(args.key, args.keyfile)
    keypair = (authid, authpw)

    if args.query:
        r = requests.get(
            args.query, auth=keypair, headers={"content-type": "application/json", "accept": "application/json"}
        )
        experiments = r.json()["@graph"]
        exp_ids = [e["accession"] for e in experiments]
    elif args.experiments:
        exp_ids = args.experiments
    else:
        exp_ids = args.infile

    for (i, exp_id) in enumerate(exp_ids):
        exp_id = exp_id.strip()
        logger.info("%s" % (exp_id))
        url = urlparse.urljoin(server, "/experiments/%s" % (exp_id))
        experiment_object = common.encoded_get(url, keypair)
        original_files = [
            common.encoded_get(urlparse.urljoin(server, "%s" % (uri)), keypair)
            for uri in experiment_object.get("original_files")
        ]
        bams = [
            f
            for f in original_files
            if f.get("file_format") == "bam" and f.get("status") not in ["revoked", "deleted", "replaced"]
        ]
        fastqs = [
            f
            for f in original_files
            if f.get("file_format") == "fastq" and f.get("status") not in ["revoked", "deleted", "replaced"]
        ]
        beds = [
            f
            for f in original_files
            if f.get("file_format") == "bed" and f.get("status") not in ["revoked", "deleted", "replaced"]
        ]
        bigBeds = [
            f
            for f in original_files
            if f.get("file_format") == "bigBed" and f.get("status") not in ["revoked", "deleted", "replaced"]
        ]
        for f in beds + bigBeds:
            notes = json.loads(f.get("notes"))
            f["job"] = dxpy.describe(notes["dx-createdBy"]["job"])
            job = dxpy.describe(notes["dx-createdBy"]["job"])
            output_names = [
                output_name
                for output_name, value in job["output"].iteritems()
                if dxpy.is_dxlink(value) and value["$dnanexus_link"] == notes["dx-id"]
            ]
            assert len(output_names) == 1
            f["output_name"] = output_names[0]
            f["dxid"] = notes["dx-id"]
        for bb in bigBeds:
            print bb["accession"]
            notes = json.loads(bb.get("notes"))
            job = dxpy.describe(notes["dx-createdBy"]["job"])
            output_name = bb["output_name"]
            assert output_name.endswith("_bb")
            print output_name
            bed_output_name = output_name.rpartition("_bb")[0]
            print bed_output_name
            bed_dxid = job["output"][bed_output_name]["$dnanexus_link"]
            print bed_dxid
            possible_beds = [
                bed["accession"] for bed in beds if bed.get("notes") and json.loads(bed["notes"])["dx-id"] == bed_dxid
            ]
            print possible_beds
            assert len(possible_beds) == 1
            print possible_beds[0]
            if not args.dryrun:
                url = urlparse.urljoin(server, "/files/%s/" % (bb["accession"]))
                payload = {"derived_from": [possible_beds[0]]}
                print url
                print payload
                r = requests.patch(
                    url,
                    auth=keypair,
                    data=json.dumps(payload),
                    headers={"content-type": "application/json", "accept": "application/json"},
                )
                try:
                    r.raise_for_status()
                except:
                    print r.text
        overlapping_peaks_beds = [b for b in beds if b.get("output_name") == "overlapping_peaks"]
        assert len(overlapping_peaks_beds) == 1
        overlapping_peaks_bed = overlapping_peaks_beds[0]
        job = overlapping_peaks_bed["job"]
        derived_from_dxids = [
            job["input"][input_name]["$dnanexus_link"]
            for input_name in job["input"].keys()
            if input_name in ["rep1_peaks", "rep2_peaks", "pooled_peaks"]
        ]
        print derived_from_dxids
        derived_from_accessions = [bed["accession"] for bed in beds if bed["dxid"] in derived_from_dxids]
        print derived_from_accessions
        if not args.dryrun:
            url = urlparse.urljoin(server, "/files/%s/" % (overlapping_peaks_bed["accession"]))
            payload = {"derived_from": derived_from_accessions}
            print url
            print payload
            r = requests.patch(
                url,
                auth=keypair,
                data=json.dumps(payload),
                headers={"content-type": "application/json", "accept": "application/json"},
            )
            try:
                r.raise_for_status()
            except:
                print r.text