def format_data_object_reference(item): if dxpy.is_dxlink(item): # Bare dxlink obj_id, proj_id = dxpy.get_dxlink_ids(item) return (proj_id + ":" if proj_id else '') + obj_id if dxpy.is_dxlink(item.get('value')): # value is set obj_id, proj_id = dxpy.get_dxlink_ids(item['value']) return (proj_id + ":" if proj_id else '') + obj_id + (' (%s)' % item['name'] if item.get('name') else '') if item.get('project') and item.get('path'): # project and folder path return item['project'] + ':' + item['path'] + "/" + obj_class + "-*" + (' (%s)' % item['name'] if item.get('name') else '') return str(item)
def unpack_tar(self, tar_file_dxlink): ''' DEV: Eventually integrate dx-toolkit into trajectoread repo so I can transition to using 'dx-download-all-inputs' to handle unpacking all input files. Pipeline used to store lane file dxids as project properties and then pass to "dx download" Description: Download and untar metadata and lane data files (/Data/Intensities/BaseCalls) ''' if dxpy.is_dxlink(tar_file_dxlink): file_handler = dxpy.get_handler(tar_file_dxlink) filename = file_handler.name else: print 'Error: Cannot unpack %s; not a valid DXLink object' sys.exit() # ('file-dxid', 'project-dxid') = dxpy.get_dxlink_ids(dxlink) file_dxid = dxpy.get_dxlink_ids(tar_file_dxlink)[0] project_id = dxpy.get_dxlink_ids(tar_file_dxlink)[1] # Download file from DNAnexus objectstore to virtual machine dxpy.download_dxfile(dxid=file_dxid, filename=filename, project=project_id) # Untar file ## DEV: Check if this is even in use anymore; also should have some method for ## checking what type of compression was used. ## But I don't think this is in use command = 'tar -xf %s --owner root --group root --no-same-owner' % filename self.createSubprocess(cmd=command, pipeStdout=False)
def add_file(iname, subdir, value): if not dxpy.is_dxlink(value): return handler = dxpy.get_handler(value) if not isinstance(handler, dxpy.DXFile): return filename = make_unix_filename(handler.name) trg_dir = iname if subdir is not None: trg_dir = os.path.join(trg_dir, subdir) files[iname].append({'trg_fname': os.path.join(trg_dir, filename), 'handler': handler, 'src_file_id': handler.id}) dirs.append(trg_dir)
def add_file(iname, subdir, value): if not dxpy.is_dxlink(value): return handler = dxpy.get_handler(value) if not isinstance(handler, dxpy.DXFile): return filename = make_unix_filename(handler.name) trg_dir = iname if subdir is not None: trg_dir = os.path.join(trg_dir, subdir) files[iname].append({ 'trg_fname': os.path.join(trg_dir, filename), 'handler': handler, 'src_file_id': handler.id }) dirs.append(trg_dir)
def _resolve_output(self, value): if dxpy.is_dxlink(value): dxfile = dxpy.DXFile(value) file_id = dxfile.get_id() if file_id not in DxWdlExecutor._data_cache: # Store each file in a subdirectory named by it's ID to avoid # naming collisions cache_dir = self._dxwdl_cache_dir / file_id cache_dir.mkdir(parents=True) filename = cache_dir / dxfile.describe()["name"] dxpy.download_dxfile(dxfile, filename) DxWdlExecutor._data_cache[file_id] = filename return DxWdlExecutor._data_cache[file_id] elif isinstance(value, dict): return { key: self._resolve_output(val) for key, val in cast(dict, value).items() } elif isinstance(value, Sequence) and not isinstance(value, str): return [self._resolve_output(val) for val in cast(Sequence, value)] else: return value
def main(): args = get_args() if args.debug: logger.setLevel(logging.DEBUG) else: logger.setLevel(logging.INFO) authid, authpw, server = common.processkey(args.key, args.keyfile) keypair = (authid, authpw) if args.query: r = requests.get( args.query, auth=keypair, headers={"content-type": "application/json", "accept": "application/json"} ) experiments = r.json()["@graph"] exp_ids = [e["accession"] for e in experiments] elif args.experiments: exp_ids = args.experiments else: exp_ids = args.infile for (i, exp_id) in enumerate(exp_ids): exp_id = exp_id.strip() logger.info("%s" % (exp_id)) url = urlparse.urljoin(server, "/experiments/%s" % (exp_id)) experiment_object = common.encoded_get(url, keypair) original_files = [ common.encoded_get(urlparse.urljoin(server, "%s" % (uri)), keypair) for uri in experiment_object.get("original_files") ] bams = [ f for f in original_files if f.get("file_format") == "bam" and f.get("status") not in ["revoked", "deleted", "replaced"] ] fastqs = [ f for f in original_files if f.get("file_format") == "fastq" and f.get("status") not in ["revoked", "deleted", "replaced"] ] beds = [ f for f in original_files if f.get("file_format") == "bed" and f.get("status") not in ["revoked", "deleted", "replaced"] ] bigBeds = [ f for f in original_files if f.get("file_format") == "bigBed" and f.get("status") not in ["revoked", "deleted", "replaced"] ] for f in beds + bigBeds: notes = json.loads(f.get("notes")) f["job"] = dxpy.describe(notes["dx-createdBy"]["job"]) job = dxpy.describe(notes["dx-createdBy"]["job"]) output_names = [ output_name for output_name, value in job["output"].iteritems() if dxpy.is_dxlink(value) and value["$dnanexus_link"] == notes["dx-id"] ] assert len(output_names) == 1 f["output_name"] = output_names[0] f["dxid"] = notes["dx-id"] for bb in bigBeds: print bb["accession"] notes = json.loads(bb.get("notes")) job = dxpy.describe(notes["dx-createdBy"]["job"]) output_name = bb["output_name"] assert output_name.endswith("_bb") print output_name bed_output_name = output_name.rpartition("_bb")[0] print bed_output_name bed_dxid = job["output"][bed_output_name]["$dnanexus_link"] print bed_dxid possible_beds = [ bed["accession"] for bed in beds if bed.get("notes") and json.loads(bed["notes"])["dx-id"] == bed_dxid ] print possible_beds assert len(possible_beds) == 1 print possible_beds[0] if not args.dryrun: url = urlparse.urljoin(server, "/files/%s/" % (bb["accession"])) payload = {"derived_from": [possible_beds[0]]} print url print payload r = requests.patch( url, auth=keypair, data=json.dumps(payload), headers={"content-type": "application/json", "accept": "application/json"}, ) try: r.raise_for_status() except: print r.text overlapping_peaks_beds = [b for b in beds if b.get("output_name") == "overlapping_peaks"] assert len(overlapping_peaks_beds) == 1 overlapping_peaks_bed = overlapping_peaks_beds[0] job = overlapping_peaks_bed["job"] derived_from_dxids = [ job["input"][input_name]["$dnanexus_link"] for input_name in job["input"].keys() if input_name in ["rep1_peaks", "rep2_peaks", "pooled_peaks"] ] print derived_from_dxids derived_from_accessions = [bed["accession"] for bed in beds if bed["dxid"] in derived_from_dxids] print derived_from_accessions if not args.dryrun: url = urlparse.urljoin(server, "/files/%s/" % (overlapping_peaks_bed["accession"])) payload = {"derived_from": derived_from_accessions} print url print payload r = requests.patch( url, auth=keypair, data=json.dumps(payload), headers={"content-type": "application/json", "accept": "application/json"}, ) try: r.raise_for_status() except: print r.text