def register(upload_id, base_path, file, meta): full_path = os.path.join(base_path, file) reg = "/%s" % file pub_uid = "/rundb/api/v1/publisher/BED/" upload_uid = "/rundb/api/v1/contentupload/%d/" % upload_id api.post("content", publisher=pub_uid, meta=json.dumps(meta), file=full_path, path=reg, contentupload=upload_uid)
def validate(upload_id, base_path, meta, bed_file, bed_type): print("Validating %s file: %s" % (bed_type,bed_file)) path_end = '/'+meta["reference"]+"/unmerged/detail/"+bed_file data, response, raw = api.get("content", publisher_name='BED', format='json', path__endswith=path_end) if int(data['meta']['total_count']) > 0: if meta['is_ampliseq']: return data['objects'][0]['file'] api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(upload_id), text='Error: The file %s already exists. Please rename your file.'%bed_file) sys.exit(1) result_UD_dir = os.path.join(base_path,meta['reference'],'unmerged','detail') result_UP_dir = os.path.join(base_path,meta['reference'],'unmerged','plain') result_MD_dir = os.path.join(base_path,meta['reference'],'merged','detail') result_MP_dir = os.path.join(base_path,meta['reference'],'merged','plain') if not os.path.exists(result_UD_dir): os.makedirs(result_UD_dir) if not os.path.exists(result_UP_dir): os.makedirs(result_UP_dir) if not os.path.exists(result_MD_dir): os.makedirs(result_MD_dir) if not os.path.exists(result_MP_dir): os.makedirs(result_MP_dir) output_log = os.path.join(base_path, bed_file+'.log') output_json = os.path.join(base_path, bed_file+'.json') cmd = '/usr/local/bin/tvcutils validate_bed' cmd += ' --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (meta['reference'],meta['reference']) if bed_type == 'target regions BED': cmd += ' --target-regions-bed "%s"' % os.path.join(base_path, bed_file) elif bed_type == 'hotspots BED': cmd += ' --hotspots-bed "%s"' % os.path.join(base_path, bed_file) cmd += ' --unmerged-detail-bed "%s"' % os.path.join(result_UD_dir, bed_file) cmd += ' --unmerged-plain-bed "%s"' % os.path.join(result_UP_dir, bed_file) cmd += ' --merged-detail-bed "%s"' % os.path.join(result_MD_dir, bed_file) cmd += ' --merged-plain-bed "%s"' % os.path.join(result_MP_dir, bed_file) cmd += ' --validation-log "%s"' % output_log cmd += ' --meta-json "%s"' % output_json print cmd p = subprocess.Popen(cmd, shell=True) stdout, stderr = p.communicate() sys.stderr.write("=== -.- ===\n") print(stdout) print(stderr) if os.path.exists(output_log): for line in open(output_log): api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(upload_id), text=line.strip()) if os.path.exists(output_json): with open(output_json) as json_file: meta.update(json.load(json_file)) if p.returncode != 0: sys.exit(p.returncode) return None
def register(upload_id, base_path, file, meta, bed_type): full_path = os.path.join(base_path, file) reg = "/%s" % file pub_uid = "/rundb/api/v1/publisher/BED/" upload_uid = "/rundb/api/v1/contentupload/%d/" % upload_id api.post("content", publisher=pub_uid, meta=json.dumps(meta), file=full_path, path=reg, contentupload=upload_uid, type=bed_type, extra=meta['reference'], description=meta.get('description',''), notes=meta.get('notes','') )
def register(file, meta): full_path = os.path.join(directory, file) reg = "/%s" % file api.post("content", publisher=pub_uid, meta=meta, file=full_path, path=reg, contentupload=upload_uid)
def pre_process(): parse = argparse.ArgumentParser() parse.add_argument('upload_id', type=int) parse.add_argument('path') parse.add_argument('upload_file') parse.add_argument('meta_file', type=argparse.FileType('r+')) try: args = parse.parse_args() except IOError as err: print("Input file error: %s" % err) parse.print_help() sys.exit(1) meta = json.load(args.meta_file, parse_float=Decimal) meta.update({ "is_ampliseq": None, "primary_bed": None, "hotspot_bed": None }) is_zip = zipfile.is_zipfile(args.upload_file) if is_zip: files = unzip_archive(args.path, args.upload_file) else: files = [args.upload_file] if len(files) == 1 and files[0].endswith('.bed'): meta['is_ampliseq'] = False meta['primary_bed'] = files[0] elif len(files) == 1 and files[0].endswith('.vcf') and meta['hotspot']: # convert vcf to bed target_filename = os.path.join(args.path,os.path.basename(files[0])) + '.bed' convert_command = '/usr/local/bin/tvcutils prepare_hotspots' convert_command += ' --input-vcf %s' % files[0] convert_command += ' --output-bed %s' % target_filename convert_command += ' --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (meta["reference"],meta["reference"]) #TODO: is this just name or full path?? convert_command += ' --filter-bypass on' process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) for line in process.communicate()[0].splitlines(): api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip()) meta['is_ampliseq'] = False meta['primary_bed'] = target_filename ''' elif len(files) == 1 and files[0].endswith('.vcf') and meta['hotspot']: # convert vcf to bed target_filename = os.path.join(args.path,os.path.basename(files[0])) + '.bed' convert_command = '/usr/local/bin/tvcutils prepare_hotspots' convert_command += ' --input-vcf %s' % files[0] convert_command += ' --output-bed %s' % target_filename convert_command += ' --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (meta["reference"],meta["reference"]) #TODO: is this just name or full path?? convert_command += ' --filter-bypass on' process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) for line in process.communicate()[0]: api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip()) meta['is_ampliseq'] = False meta['primary_bed'] = target_filename ''' elif "plan.json" in files: print("Found ampliseq") meta['is_ampliseq'] = True plan_data = json.load(open(os.path.join(args.path, "plan.json")), parse_float=Decimal) version, design = ampliseq.handle_versioned_plans(plan_data) meta['design'] = design plan = design['plan'] try: meta['primary_bed'] = plan['designed_bed'] meta['secondary_bed'] = plan['hotspot_bed'] if not meta.get("reference", None): meta['reference'] = design['genome'].lower() except KeyError as err: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") raise print(meta) else: raise ValueError("Upload must be either valid Ampliseq export or contain a single BED file.") args.meta_file.truncate(0) args.meta_file.seek(0) json.dump(meta, args.meta_file, cls=JSONEncoder) api.patch("contentupload", args.upload_id, meta=meta)
def post(self, endpoint, data=None): return post(VentataAPI.BASE_URL + endpoint, self.api_key, data)
def main(): parse = argparse.ArgumentParser() parse.add_argument('upload_id', type=int) parse.add_argument('path') parse.add_argument('upload_file') parse.add_argument('meta_file') try: args = parse.parse_args() except IOError as err: print("ERROR: Input file error: %s" % err) parse.print_help() sys.exit(1) with open(args.meta_file) as f: meta = json.load(f, parse_float=Decimal) files = meta.get('pre_process_files') target_regions_bed = None hotspots_bed = None meta['is_ampliseq'] = False if len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot', False) == False: target_regions_bed = os.path.basename(files[0]) meta['is_ampliseq'] = False print "Content: Target regions file in BED format" print elif len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot', False) == True: hotspots_bed = os.path.basename(files[0]) meta['is_ampliseq'] = False print "Content: Hotspots file in BED format" print elif len(files) == 1 and files[0].endswith('.vcf') and meta.get('hotspot', False) == True: print "Content: Hotspots file in VCF format" print print "Converting hotspot VCF file to BED: %s" % files[0] print hotspots_bed = os.path.basename(files[0]) + '.bed' convert_command = '/usr/local/bin/tvcutils prepare_hotspots' convert_command += ' --input-vcf %s' % os.path.join(args.path, os.path.basename(files[0])) convert_command += ' --output-bed %s' % os.path.join(args.path, hotspots_bed) convert_command += ' --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % ( meta["reference"], meta["reference"]) convert_command += ' --filter-bypass on' p = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) print p.communicate()[0] if p.returncode != 0: sys.exit(p.returncode) #process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) #for line in process.communicate()[0].splitlines(): # api.post('log', upload='/rundb/api/v1/contentupload/%s/' % # str(args.upload_id), text=line.strip()) meta['is_ampliseq'] = False elif "plan.json" in files: print "Content: AmpliSeq ZIP" print meta['is_ampliseq'] = True plan_data = json.load(open(os.path.join(args.path, "plan.json"))) version, design, meta = ampliseq.handle_versioned_plans(plan_data, meta, args.path) meta['design'] = design try: target_regions_bed = design['plan']['designed_bed'] hotspots_bed = design['plan']['hotspot_bed'] if not meta.get("reference", None): meta['reference'] = design['genome'] if 'design_name' in plan_data: meta['description'] = design['design_name'] api.update_meta(meta, args) except KeyError as err: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") print "ERROR: Malformed AmpliSeq archive: missing json key "+str(err) sys.exit(1) #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text="Malformed AmpliSeq archive: missing json key "+str(err)) #raise if target_regions_bed and target_regions_bed not in files: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") print "ERROR: Target region file %s not present in AmpliSeq archive" % target_regions_bed sys.exit(1) #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), # text="Malformed AmpliSeq archive: Target region file %s not present in AmpliSeq archive" % target_regions_bed) #raise ValueError("Target region file %s not present in AmpliSeq archive" % target_regions_bed) if hotspots_bed and hotspots_bed not in files: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") print "ERROR: Hotspots file %s not present in AmpliSeq archive" % target_regions_bed sys.exit(1) #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), # text="Malformed AmpliSeq archive: Hotspots file %s not present in AmpliSeq archive" % target_regions_bed) #raise ValueError("Hotspots file %s not present in AmpliSeq archive" % target_regions_bed) else: api.patch("contentupload", args.upload_id, status="Error: Unrecognized upload type.") print print "ERROR: Unrecognized upload type. Upload must be either a valid Ampliseq ZIP or contain a single BED or VCF file." sys.exit(1) ''' === Validate and Register === ''' primary_path = None secondary_path = None if is_BED_encrypted(meta): if target_regions_bed: meta['design']['plan']['designed_bed'] = '' if hotspots_bed: meta['design']['plan']['hotspot_bed'] = '' primary_path = "" secondary_path = "" else: if target_regions_bed: primary_path = validate(args.upload_id, args.path, meta, target_regions_bed, 'target regions BED') if hotspots_bed: secondary_path = validate(args.upload_id, args.path, meta, hotspots_bed, 'hotspots BED') meta["hotspot"] = False if target_regions_bed and not primary_path: register_bed_file(args.upload_id, args.path, meta, target_regions_bed) if hotspots_bed: meta["hotspot"] = True if not secondary_path: register_bed_file(args.upload_id, args.path, meta, hotspots_bed) if meta['is_ampliseq']: try: if not (is_BED_encrypted(meta)): if target_regions_bed and not primary_path: primary_path = os.path.join( args.path, meta["reference"]+"/unmerged/detail/"+target_regions_bed) if hotspots_bed and not secondary_path: secondary_path = os.path.join( args.path, meta["reference"]+"/unmerged/detail/"+hotspots_bed) else: run_type = meta['design']['plan'].get('runType', None) if run_type and (run_type == "AMPS_RNA"): meta['reference'] = None plan_prototype, alignmentargs_override = plan_json( meta, args.upload_id, primary_path, secondary_path) success, response, content = api.post("plannedexperiment", **plan_prototype) if not success: api.patch("contentupload", args.upload_id, status="Error: unable to create TS Plan") err_content = json.loads(content) error_message_array = [] if 'error' in err_content: error_json = json.loads(str(err_content['error'][3:-2])) for k in error_json: for j in range(len(error_json[k])): err_message = str(error_json[k][j]) err_message = err_message.replace('>', '>') error_message_array.append(err_message) error_messages = ','.join(error_message_array) raise Exception(error_messages) if alignmentargs_override: content_dict = json.loads(content) api.patch("plannedexperiment", content_dict[ "id"], alignmentargs=alignmentargs_override, thumbnailalignmentargs=alignmentargs_override) except Exception as err: print("ERROR: Could not create plan from this zip: %s." % err) raise api.update_meta(meta, args)
def main(): parse = argparse.ArgumentParser() parse.add_argument('upload_id', type=int) parse.add_argument('path') parse.add_argument('upload_file') parse.add_argument('meta_file') try: args = parse.parse_args() except IOError as err: print("Input file error: %s" % err) parse.print_help() sys.exit(1) meta_file_handle = open(args.meta_file,'r') meta = json.load(meta_file_handle, parse_float=Decimal) meta_file_handle.close() is_zip = zipfile.is_zipfile(args.upload_file) if is_zip: files = file_utils.unzip_archive(args.path, args.upload_file) elif args.upload_file.endswith('.gz'): files = [os.path.basename(args.upload_file[:-3])] cmd = 'gzip -dc %s > %s ' % (args.upload_file, os.path.join(args.path,files[0])) subprocess.call(cmd, shell=True) else: files = [args.upload_file] ''' Establish the upload type ''' target_regions_bed = None hotspots_bed = None meta['is_ampliseq'] = False if len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot',False) == False: target_regions_bed = os.path.basename(files[0]) meta['is_ampliseq'] = False elif len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot',False) == True: hotspots_bed = os.path.basename(files[0]) meta['is_ampliseq'] = False elif len(files) == 1 and files[0].endswith('.vcf') and meta.get('hotspot',False) == True: hotspots_bed = os.path.basename(files[0]) + '.bed' convert_command = '/usr/local/bin/tvcutils prepare_hotspots' convert_command += ' --input-vcf %s' % os.path.join(args.path,os.path.basename(files[0])) convert_command += ' --output-bed %s' % os.path.join(args.path,hotspots_bed) convert_command += ' --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (meta["reference"],meta["reference"]) #TODO: is this just name or full path?? convert_command += ' --filter-bypass on' process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) for line in process.communicate()[0].splitlines(): api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip()) meta['is_ampliseq'] = False elif "plan.json" in files: meta['is_ampliseq'] = True plan_data = json.load(open(os.path.join(args.path, "plan.json"))) version, design = ampliseq.handle_versioned_plans(plan_data, meta) meta['design'] = design try: target_regions_bed = design['plan']['designed_bed'] hotspots_bed = design['plan']['hotspot_bed'] if not meta.get("reference", None): meta['reference'] = design['genome'].lower() if 'design_name' in plan_data: meta['description'] = design['design_name'] meta_file_handle = open(args.meta_file,'w') json.dump(meta, meta_file_handle, cls=JSONEncoder) meta_file_handle.close() api.patch("contentupload", args.upload_id, meta=meta) except KeyError as err: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text="Malformed AmpliSeq archive: missing json key "+str(err)) raise if target_regions_bed and target_regions_bed not in files: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text="Malformed AmpliSeq archive: Target region file %s not present in AmpliSeq archive" % target_regions_bed) raise ValueError("Target region file %s not present in AmpliSeq archive" % target_regions_bed) if hotspots_bed and hotspots_bed not in files: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text="Malformed AmpliSeq archive: Hotspots file %s not present in AmpliSeq archive" % target_regions_bed) raise ValueError("Hotspots file %s not present in AmpliSeq archive" % target_regions_bed) else: api.patch("contentupload", args.upload_id, status="Error: Upload must be either valid Ampliseq export or contain a single BED or VCF file.") raise ValueError("Upload must be either valid Ampliseq export or contain a single BED or VCF file.") ''' === Validate and Register === ''' primary_path = None secondary_path = None if target_regions_bed: primary_path = validate(args.upload_id, args.path, meta, target_regions_bed, 'target regions BED') if hotspots_bed: secondary_path = validate(args.upload_id, args.path, meta, hotspots_bed, 'hotspots BED') meta["hotspot"] = False if target_regions_bed and not primary_path: register_bed_file(args.upload_id, args.path, meta, target_regions_bed) if hotspots_bed: meta["hotspot"] = True if not secondary_path: register_bed_file(args.upload_id, args.path, meta, hotspots_bed) if meta['is_ampliseq']: try: if target_regions_bed and not primary_path: primary_path = os.path.join(args.path, meta["reference"]+"/unmerged/detail/"+target_regions_bed) if hotspots_bed and not secondary_path: secondary_path = os.path.join(args.path, meta["reference"]+"/unmerged/detail/"+hotspots_bed) plan_prototype = plan_json(meta, primary_path, secondary_path) api.post("plannedexperiment", **plan_prototype) except Exception as err: print("Could not create plan from this zip: %s" % err) meta_file_handle = open(args.meta_file,'w') json.dump(meta, meta_file_handle, cls=JSONEncoder) meta_file_handle.close() api.patch("contentupload", args.upload_id, meta=meta)
def main(): parse = argparse.ArgumentParser() parse.add_argument('upload_id', type=int) parse.add_argument('path') parse.add_argument('upload_file') parse.add_argument('meta_file') try: args = parse.parse_args() except IOError as err: print("ERROR: Input file error: %s" % err) parse.print_help() sys.exit(1) meta_file_handle = open(args.meta_file, 'r') meta = json.load(meta_file_handle, parse_float=Decimal) meta_file_handle.close() print "Uploaded file: " + os.path.basename(args.upload_file) is_zip = zipfile.is_zipfile(args.upload_file) if is_zip: files = file_utils.unzip_archive(args.path, args.upload_file) print "Compressed: Yes (zip)" elif args.upload_file.endswith('.gz'): print "Compressed: Yes (gzip)" files = [os.path.basename(args.upload_file[:-3])] cmd = 'gzip -dc %s > %s ' % (args.upload_file, os.path.join(args.path, files[0])) p = subprocess.Popen(cmd, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) print p.communicate()[0] if p.returncode != 0: sys.exit(p.returncode) subprocess.call(cmd, shell=True) else: print "Compressed: No" files = [args.upload_file] ''' Establish the upload type ''' target_regions_bed = None hotspots_bed = None meta['is_ampliseq'] = False if len(files) == 1 and files[0].endswith('.bed') and meta.get( 'hotspot', False) == False: target_regions_bed = os.path.basename(files[0]) meta['is_ampliseq'] = False print "Content: Target regions file in BED format" print elif len(files) == 1 and files[0].endswith('.bed') and meta.get( 'hotspot', False) == True: hotspots_bed = os.path.basename(files[0]) meta['is_ampliseq'] = False print "Content: Hotspots file in BED format" print elif len(files) == 1 and files[0].endswith('.vcf') and meta.get( 'hotspot', False) == True: print "Content: Hotspots file in VCF format" print print "Converting hotspot VCF file to BED: %s" % files[0] print hotspots_bed = os.path.basename(files[0]) + '.bed' convert_command = '/usr/local/bin/tvcutils prepare_hotspots' convert_command += ' --input-vcf %s' % os.path.join( args.path, os.path.basename(files[0])) convert_command += ' --output-bed %s' % os.path.join( args.path, hotspots_bed) convert_command += ' --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % ( meta["reference"], meta["reference"]) convert_command += ' --filter-bypass on' p = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) print p.communicate()[0] if p.returncode != 0: sys.exit(p.returncode) #process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) #for line in process.communicate()[0].splitlines(): # api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip()) meta['is_ampliseq'] = False elif "plan.json" in files: print "Content: AmpliSeq ZIP" print meta['is_ampliseq'] = True plan_data = json.load(open(os.path.join(args.path, "plan.json"))) version, design, meta = ampliseq.handle_versioned_plans( plan_data, meta) meta['design'] = design try: target_regions_bed = design['plan']['designed_bed'] hotspots_bed = design['plan']['hotspot_bed'] if not meta.get("reference", None): meta['reference'] = design['genome'].lower() if 'design_name' in plan_data: meta['description'] = design['design_name'] meta_file_handle = open(args.meta_file, 'w') json.dump(meta, meta_file_handle, cls=JSONEncoder) meta_file_handle.close() api.patch("contentupload", args.upload_id, meta=meta) except KeyError as err: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") print "ERROR: Malformed AmpliSeq archive: missing json key " + str( err) sys.exit(1) #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text="Malformed AmpliSeq archive: missing json key "+str(err)) #raise if target_regions_bed and target_regions_bed not in files: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") print "ERROR: Target region file %s not present in AmpliSeq archive" % target_regions_bed sys.exit(1) #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), # text="Malformed AmpliSeq archive: Target region file %s not present in AmpliSeq archive" % target_regions_bed) #raise ValueError("Target region file %s not present in AmpliSeq archive" % target_regions_bed) if hotspots_bed and hotspots_bed not in files: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") print "ERROR: Hotspots file %s not present in AmpliSeq archive" % target_regions_bed sys.exit(1) #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), # text="Malformed AmpliSeq archive: Hotspots file %s not present in AmpliSeq archive" % target_regions_bed) #raise ValueError("Hotspots file %s not present in AmpliSeq archive" % target_regions_bed) else: api.patch("contentupload", args.upload_id, status="Error: Unrecognized upload type.") print print "ERROR: Unrecognized upload type. Upload must be either a valid Ampliseq ZIP or contain a single BED or VCF file." sys.exit(1) ''' === Validate and Register === ''' primary_path = None secondary_path = None if target_regions_bed: primary_path = validate(args.upload_id, args.path, meta, target_regions_bed, 'target regions BED') if hotspots_bed: secondary_path = validate(args.upload_id, args.path, meta, hotspots_bed, 'hotspots BED') meta["hotspot"] = False if target_regions_bed and not primary_path: register_bed_file(args.upload_id, args.path, meta, target_regions_bed) if hotspots_bed: meta["hotspot"] = True if not secondary_path: register_bed_file(args.upload_id, args.path, meta, hotspots_bed) if meta['is_ampliseq']: try: if target_regions_bed and not primary_path: primary_path = os.path.join( args.path, meta["reference"] + "/unmerged/detail/" + target_regions_bed) if hotspots_bed and not secondary_path: secondary_path = os.path.join( args.path, meta["reference"] + "/unmerged/detail/" + hotspots_bed) plan_prototype = plan_json(meta, args.upload_id, primary_path, secondary_path) success, response, content = api.post("plannedexperiment", **plan_prototype) if not success: api.patch("contentupload", args.upload_id, status="Error: unable to create TS Plan") raise Exception("Plan creation API request failed.") except Exception as err: print("ERROR: Could not create plan from this zip: %s" % err) raise meta_file_handle = open(args.meta_file, 'w') json.dump(meta, meta_file_handle, cls=JSONEncoder) meta_file_handle.close() api.patch("contentupload", args.upload_id, meta=meta)
def validate(upload_id, base_path, meta, bed_file, bed_type): print("Validating %s file: %s" % (bed_type, bed_file)) path_end = '/' + meta["reference"] + "/unmerged/detail/" + bed_file data, response, raw = api.get("content", publisher_name='BED', format='json', path__endswith=path_end) if int(data['meta']['total_count']) > 0: if meta['is_ampliseq']: return data['objects'][0]['file'] api.post( 'log', upload='/rundb/api/v1/contentupload/%s/' % str(upload_id), text='Error: The file %s already exists. Please rename your file.' % bed_file) sys.exit(1) result_UD_dir = os.path.join(base_path, meta['reference'], 'unmerged', 'detail') result_UP_dir = os.path.join(base_path, meta['reference'], 'unmerged', 'plain') result_MD_dir = os.path.join(base_path, meta['reference'], 'merged', 'detail') result_MP_dir = os.path.join(base_path, meta['reference'], 'merged', 'plain') if not os.path.exists(result_UD_dir): os.makedirs(result_UD_dir) if not os.path.exists(result_UP_dir): os.makedirs(result_UP_dir) if not os.path.exists(result_MD_dir): os.makedirs(result_MD_dir) if not os.path.exists(result_MP_dir): os.makedirs(result_MP_dir) output_log = os.path.join(base_path, bed_file + '.log') output_json = os.path.join(base_path, bed_file + '.json') cmd = '/usr/local/bin/tvcutils validate_bed' cmd += ' --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % ( meta['reference'], meta['reference']) if bed_type == 'target regions BED': cmd += ' --target-regions-bed "%s"' % os.path.join( base_path, bed_file) elif bed_type == 'hotspots BED': cmd += ' --hotspots-bed "%s"' % os.path.join(base_path, bed_file) cmd += ' --unmerged-detail-bed "%s"' % os.path.join( result_UD_dir, bed_file) cmd += ' --unmerged-plain-bed "%s"' % os.path.join( result_UP_dir, bed_file) cmd += ' --merged-detail-bed "%s"' % os.path.join(result_MD_dir, bed_file) cmd += ' --merged-plain-bed "%s"' % os.path.join(result_MP_dir, bed_file) cmd += ' --validation-log "%s"' % output_log cmd += ' --meta-json "%s"' % output_json print cmd p = subprocess.Popen(cmd, shell=True) stdout, stderr = p.communicate() sys.stderr.write("=== -.- ===\n") print(stdout) print(stderr) if os.path.exists(output_log): for line in open(output_log): api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(upload_id), text=line.strip()) if os.path.exists(output_json): with open(output_json) as json_file: meta.update(json.load(json_file)) if p.returncode != 0: sys.exit(p.returncode) return None
True, "username": "******", } return plan_stub if meta['is_ampliseq']: print("I can't believe it's not Ampliseq!") print("Primary: %s" % meta['primary_bed']) print("Secondary: %s" % meta['secondary_bed']) meta["hotspot"] = False register_bed_file(meta['primary_bed'], json.dumps(meta)) if meta['secondary_bed'] is not None: meta["hotspot"] = True register_bed_file(meta['secondary_bed'], json.dumps(meta)) try: plan_prototype = plan_json(meta) api.post("plannedexperiment", **plan_prototype) except Exception as err: print("Could not create plan from this zip: %s" % err) sys.exit() elif bedFile.endswith('.vcf'): # Get bed file name without directory path bed_file_name = bedFile.split("/").pop() register_bed_file(bed_file_name + '.bed', line) else: # Get bed file name without directory path bed_file_name = bedFile.split("/").pop() register_bed_file(bed_file_name, line)
"sampleDisplayedName": "", "samplePrepKitName": "", "seqKitBarcode": None, "sequencekitname": "IonPGM200Kit", "storageHost": None, "storage_options": "A", "templatingKitName": "Ion OneTouch 200 Template Kit v2 DL", "usePostBeadfind": True, "usePreBeadfind": True, "username": "******", "variantfrequency": "Germ Line" } return plan_stub if meta['is_ampliseq']: print("I can't believe it's not Ampliseq!") print("Primary: %s" % meta['primary_bed']) print("Secondary: %s" % meta['secondary_bed']) meta["hotspot"] = False register_bed_file(meta['primary_bed'], json.dumps(meta)) if meta['secondary_bed'] is not None: meta["hotspot"] = True register_bed_file(meta['secondary_bed'], json.dumps(meta)) plan_prototype = plan_json(meta) api.post("plannedexperiment", **plan_prototype) sys.exit() else: # Get bed file name without directory path bed_file_name = bedFile.split("/").pop() register_bed_file(bed_file_name, line)
def pre_process(): parse = argparse.ArgumentParser() parse.add_argument('upload_id', type=int) parse.add_argument('path') parse.add_argument('upload_file') parse.add_argument('meta_file', type=argparse.FileType('r+')) try: args = parse.parse_args() except IOError as err: print("Input file error: %s" % err) parse.print_help() sys.exit(1) meta = json.load(args.meta_file, parse_float=Decimal) meta.update({ "is_ampliseq": None, "primary_bed": None, "hotspot_bed": None }) is_zip = zipfile.is_zipfile(args.upload_file) if is_zip: files = unzip_archive(args.path, args.upload_file) else: files = [args.upload_file] if len(files) == 1 and files[0].endswith('.bed'): meta['is_ampliseq'] = False meta['primary_bed'] = files[0] elif len(files) == 1 and files[0].endswith('.vcf') and meta['hotspot']: # convert vcf to bed target_filename = os.path.join(args.path, os.path.basename( files[0])) + '.bed' convert_command = '/usr/local/bin/tvcutils prepare_hotspots' convert_command += ' --input-vcf %s' % files[0] convert_command += ' --output-bed %s' % target_filename convert_command += ' --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % ( meta["reference"], meta["reference"] ) #TODO: is this just name or full path?? convert_command += ' --filter-bypass on' process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) for line in process.communicate()[0].splitlines(): api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip()) meta['is_ampliseq'] = False meta['primary_bed'] = target_filename ''' elif len(files) == 1 and files[0].endswith('.vcf') and meta['hotspot']: # convert vcf to bed target_filename = os.path.join(args.path,os.path.basename(files[0])) + '.bed' convert_command = '/usr/local/bin/tvcutils prepare_hotspots' convert_command += ' --input-vcf %s' % files[0] convert_command += ' --output-bed %s' % target_filename convert_command += ' --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (meta["reference"],meta["reference"]) #TODO: is this just name or full path?? convert_command += ' --filter-bypass on' process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) for line in process.communicate()[0]: api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip()) meta['is_ampliseq'] = False meta['primary_bed'] = target_filename ''' elif "plan.json" in files: print("Found ampliseq") meta['is_ampliseq'] = True plan_data = json.load(open(os.path.join(args.path, "plan.json")), parse_float=Decimal) version, design = ampliseq.handle_versioned_plans(plan_data) meta['design'] = design plan = design['plan'] try: meta['primary_bed'] = plan['designed_bed'] meta['secondary_bed'] = plan['hotspot_bed'] if not meta.get("reference", None): meta['reference'] = design['genome'].lower() except KeyError as err: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") raise print(meta) else: raise ValueError( "Upload must be either valid Ampliseq export or contain a single BED file." ) args.meta_file.truncate(0) args.meta_file.seek(0) json.dump(meta, args.meta_file, cls=JSONEncoder) api.patch("contentupload", args.upload_id, meta=meta)