def check_reference(meta, args): """Check and install the needed reference genome""" print("Checking reference") plan_data = json.load(open(os.path.join(args.path, "plan.json"))) version, design, meta = ampliseq.handle_versioned_plans(plan_data, meta) print("Got versioned stuff") # If we have a genome reference, check to see if it's installed reference = design.get('genome_reference', None) print(reference) if not reference: return False try: url = reference.get('uri') ref_hash = reference.get('files_md5sum', {}).get('fasta') short_name = reference.get('short_name') name = reference.get('name') notes = reference.get('notes', "AmpliSeq Import") print("Got various fields") except KeyError as err: # If the key does not exist, that's fine, but it can't exist and be corrupt print("Corrupt genome_reference entry: {0}".format(err)) sys.exit(1) # The identity_hash matching the files_md5sum.fasta hash determines whether # or not the genome is installed print("Checking reference " + ref_hash) if not models.ReferenceGenome.objects.filter(identity_hash=ref_hash).exists(): reference = models.ReferenceGenome( enabled = False, identity_hash = ref_hash, name = name, notes = notes, short_name = short_name, source = url, status = "downloading", index_version = "tmap-f3" ) reference.save() print("created new reference") pub = models.Publisher.objects.get(name='BED') upload = models.ContentUpload.objects.get(pk=args.upload_id) # This is a celery subtask that will run the publisher scripts on this upload again finish_me = run_pub_scripts.si(pub, upload) print("About t set check point") set_checkpoint(meta, args) print("check point set") # With a status starting with "Waiting" the framework will stop # after pre_processing, before validate. upload.status = "Waiting on reference" upload.save() # the subtask finish_me will be called at the end of the reference install # process to restart validation of the upload start_reference_download(url, reference, callback=finish_me) print("Started reference download") return True print("exiting in shame") return False
def pre_process(): parse = argparse.ArgumentParser() parse.add_argument('upload_id', type=int) parse.add_argument('path') parse.add_argument('upload_file') parse.add_argument('meta_file', type=argparse.FileType('r+')) try: args = parse.parse_args() except IOError as err: print("Input file error: %s" % err) parse.print_help() sys.exit(1) meta = json.load(args.meta_file) meta.update({ "is_ampliseq": None, "primary_bed": None, "hotspot_bed": None }) is_zip = zipfile.is_zipfile(args.upload_file) if is_zip: files = unzip_archive(args.path, args.upload_file) else: files = [args.upload_file] if len(files) == 1 and files[0].endswith('.bed'): meta['is_ampliseq'] = False meta['primary_bed'] = files[0] elif "plan.json" in files: print("Found ampliseq") meta['is_ampliseq'] = True plan_data = json.load(open(os.path.join(args.path, "plan.json"))) version, design = ampliseq.handle_versioned_plans(plan_data) meta['design'] = design plan = design['plan'] try: meta['primary_bed'] = plan['designed_bed'] meta['secondary_bed'] = plan['hotspot_bed'] if 'reference' not in meta: meta['reference'] = plan['genome'].lower() except KeyError as err: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") raise print(meta) else: raise ValueError("Upload must be either valid Ampliseq export or contain a single BED file.") args.meta_file.truncate(0) args.meta_file.seek(0) json.dump(meta, args.meta_file) api.patch("contentupload", args.upload_id, meta=meta)
def get_ampliseq_fixed_designs(user, password): h = httplib2.Http(disable_ssl_certificate_validation=settings.DEBUG) h.add_credentials(user, password) url = urlparse.urljoin(settings.AMPLISEQ_URL, "ws/tmpldesign/list/active") response, content = h.request(url) if response['status'] == '200': designs = json.loads(content) fixed = [] for template in designs.get('TemplateDesigns', []): version, data, meta = ampliseq.handle_versioned_plans(template) fixed.append(data) return response, fixed else: return response, None
def check_reference(meta, args): print("Checking reference") plan_data = json.load(open(os.path.join(args.path, "plan.json"))) version, design, meta = ampliseq.handle_versioned_plans(plan_data, meta) print("Got versioned stuff") reference = design.get('genome_reference', None) print(reference) if not reference: return False try: url = reference.get('uri') ref_hash = reference.get('files_md5sum', {}).get('fasta') short_name = reference.get('short_name') name = reference.get('name') notes = reference.get('notes', "AmpliSeq Import") print("Got various fields") except KeyError as err: print("Corrupt genome_reference entry: {0}".format(err)) sys.exit(1) print("Checking reference " + ref_hash) if not models.ReferenceGenome.objects.filter(identity_hash=ref_hash).exists(): reference = models.ReferenceGenome( enabled = False, identity_hash = ref_hash, name = name, notes = notes, short_name = short_name, source = url, status = "downloading", index_version = "tmap-f3" ) reference.save() print("created new reference") pub = models.Publisher.objects.get(name='BED') upload = models.ContentUpload.objects.get(pk=args.upload_id) finish_me = run_pub_scripts.si(pub, upload) print("About t set check point") set_checkpoint(meta, args) print("check point set") upload.status = "Waiting on reference" upload.save() start_reference_download(url, reference, callback=finish_me) print("Started reference download") return True print("exiting in shame") return False
def get_ampliseq_designs(user, password): h = httplib2.Http(disable_ssl_certificate_validation=settings.DEBUG) h.add_credentials(user, password) url = urlparse.urljoin(settings.AMPLISEQ_URL, "ws/design/list") response, content = h.request(url) if response['status'] == '200': design_data = json.loads(content) designs = design_data.get('AssayDesigns', []) for design in designs: solutions = [] for solution in design.get('DesignSolutions', []): version, data, meta = ampliseq.handle_versioned_plans(solution) solutions.append(data) design['DesignSolutions'] = solutions return response, designs else: return response, {}
def check_reference(meta, args): print("Checking reference") plan_data = json.load(open(os.path.join(args.path, "plan.json"))) version, design, meta = ampliseq.handle_versioned_plans(plan_data, meta) print("Got versioned stuff") reference = design.get('genome_reference', None) print(reference) if not reference: return False try: url = reference.get('uri') ref_hash = reference.get('files_md5sum', {}).get('fasta') short_name = reference.get('short_name') name = reference.get('name') notes = reference.get('notes', "AmpliSeq Import") print("Got various fields") except KeyError as err: print("Corrupt genome_reference entry: {0}".format(err)) sys.exit(1) print("Checking reference " + ref_hash) if not models.ReferenceGenome.objects.filter( identity_hash=ref_hash).exists(): reference = models.ReferenceGenome(enabled=False, identity_hash=ref_hash, name=name, notes=notes, short_name=short_name, source=url, status="downloading", index_version="tmap-f3") reference.save() print("created new reference") pub = models.Publisher.objects.get(name='BED') upload = models.ContentUpload.objects.get(pk=args.upload_id) finish_me = run_pub_scripts.si(pub, upload) print("About t set check point") set_checkpoint(meta, args) print("check point set") upload.status = "Waiting on reference" upload.save() start_reference_download(url, reference, callback=finish_me) print("Started reference download") return True print("exiting in shame") return False
def pre_process(): parse = argparse.ArgumentParser() parse.add_argument('upload_id', type=int) parse.add_argument('path') parse.add_argument('upload_file') parse.add_argument('meta_file', type=argparse.FileType('r+')) try: args = parse.parse_args() except IOError as err: print("Input file error: %s" % err) parse.print_help() sys.exit(1) meta = json.load(args.meta_file, parse_float=Decimal) meta.update({ "is_ampliseq": None, "primary_bed": None, "hotspot_bed": None }) is_zip = zipfile.is_zipfile(args.upload_file) if is_zip: files = unzip_archive(args.path, args.upload_file) else: files = [args.upload_file] if len(files) == 1 and files[0].endswith('.bed'): meta['is_ampliseq'] = False meta['primary_bed'] = files[0] elif len(files) == 1 and files[0].endswith('.vcf') and meta['hotspot']: # convert vcf to bed target_filename = os.path.join(args.path,os.path.basename(files[0])) + '.bed' convert_command = '/usr/local/bin/tvcutils prepare_hotspots' convert_command += ' --input-vcf %s' % files[0] convert_command += ' --output-bed %s' % target_filename convert_command += ' --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (meta["reference"],meta["reference"]) #TODO: is this just name or full path?? convert_command += ' --filter-bypass on' process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) for line in process.communicate()[0].splitlines(): api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip()) meta['is_ampliseq'] = False meta['primary_bed'] = target_filename ''' elif len(files) == 1 and files[0].endswith('.vcf') and meta['hotspot']: # convert vcf to bed target_filename = os.path.join(args.path,os.path.basename(files[0])) + '.bed' convert_command = '/usr/local/bin/tvcutils prepare_hotspots' convert_command += ' --input-vcf %s' % files[0] convert_command += ' --output-bed %s' % target_filename convert_command += ' --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (meta["reference"],meta["reference"]) #TODO: is this just name or full path?? convert_command += ' --filter-bypass on' process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) for line in process.communicate()[0]: api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip()) meta['is_ampliseq'] = False meta['primary_bed'] = target_filename ''' elif "plan.json" in files: print("Found ampliseq") meta['is_ampliseq'] = True plan_data = json.load(open(os.path.join(args.path, "plan.json")), parse_float=Decimal) version, design = ampliseq.handle_versioned_plans(plan_data) meta['design'] = design plan = design['plan'] try: meta['primary_bed'] = plan['designed_bed'] meta['secondary_bed'] = plan['hotspot_bed'] if not meta.get("reference", None): meta['reference'] = design['genome'].lower() except KeyError as err: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") raise print(meta) else: raise ValueError("Upload must be either valid Ampliseq export or contain a single BED file.") args.meta_file.truncate(0) args.meta_file.seek(0) json.dump(meta, args.meta_file, cls=JSONEncoder) api.patch("contentupload", args.upload_id, meta=meta)
def main(): parse = argparse.ArgumentParser() parse.add_argument('upload_id', type=int) parse.add_argument('path') parse.add_argument('upload_file') parse.add_argument('meta_file') try: args = parse.parse_args() except IOError as err: print("ERROR: Input file error: %s" % err) parse.print_help() sys.exit(1) with open(args.meta_file) as f: meta = json.load(f, parse_float=Decimal) files = meta.get('pre_process_files') target_regions_bed = None hotspots_bed = None meta['is_ampliseq'] = False if len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot', False) == False: target_regions_bed = os.path.basename(files[0]) meta['is_ampliseq'] = False print "Content: Target regions file in BED format" print elif len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot', False) == True: hotspots_bed = os.path.basename(files[0]) meta['is_ampliseq'] = False print "Content: Hotspots file in BED format" print elif len(files) == 1 and files[0].endswith('.vcf') and meta.get('hotspot', False) == True: print "Content: Hotspots file in VCF format" print print "Converting hotspot VCF file to BED: %s" % files[0] print hotspots_bed = os.path.basename(files[0]) + '.bed' convert_command = '/usr/local/bin/tvcutils prepare_hotspots' convert_command += ' --input-vcf %s' % os.path.join(args.path, os.path.basename(files[0])) convert_command += ' --output-bed %s' % os.path.join(args.path, hotspots_bed) convert_command += ' --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % ( meta["reference"], meta["reference"]) convert_command += ' --filter-bypass on' p = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) print p.communicate()[0] if p.returncode != 0: sys.exit(p.returncode) #process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) #for line in process.communicate()[0].splitlines(): # api.post('log', upload='/rundb/api/v1/contentupload/%s/' % # str(args.upload_id), text=line.strip()) meta['is_ampliseq'] = False elif "plan.json" in files: print "Content: AmpliSeq ZIP" print meta['is_ampliseq'] = True plan_data = json.load(open(os.path.join(args.path, "plan.json"))) version, design, meta = ampliseq.handle_versioned_plans(plan_data, meta, args.path) meta['design'] = design try: target_regions_bed = design['plan']['designed_bed'] hotspots_bed = design['plan']['hotspot_bed'] if not meta.get("reference", None): meta['reference'] = design['genome'] if 'design_name' in plan_data: meta['description'] = design['design_name'] api.update_meta(meta, args) except KeyError as err: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") print "ERROR: Malformed AmpliSeq archive: missing json key "+str(err) sys.exit(1) #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text="Malformed AmpliSeq archive: missing json key "+str(err)) #raise if target_regions_bed and target_regions_bed not in files: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") print "ERROR: Target region file %s not present in AmpliSeq archive" % target_regions_bed sys.exit(1) #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), # text="Malformed AmpliSeq archive: Target region file %s not present in AmpliSeq archive" % target_regions_bed) #raise ValueError("Target region file %s not present in AmpliSeq archive" % target_regions_bed) if hotspots_bed and hotspots_bed not in files: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") print "ERROR: Hotspots file %s not present in AmpliSeq archive" % target_regions_bed sys.exit(1) #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), # text="Malformed AmpliSeq archive: Hotspots file %s not present in AmpliSeq archive" % target_regions_bed) #raise ValueError("Hotspots file %s not present in AmpliSeq archive" % target_regions_bed) else: api.patch("contentupload", args.upload_id, status="Error: Unrecognized upload type.") print print "ERROR: Unrecognized upload type. Upload must be either a valid Ampliseq ZIP or contain a single BED or VCF file." sys.exit(1) ''' === Validate and Register === ''' primary_path = None secondary_path = None if is_BED_encrypted(meta): if target_regions_bed: meta['design']['plan']['designed_bed'] = '' if hotspots_bed: meta['design']['plan']['hotspot_bed'] = '' primary_path = "" secondary_path = "" else: if target_regions_bed: primary_path = validate(args.upload_id, args.path, meta, target_regions_bed, 'target regions BED') if hotspots_bed: secondary_path = validate(args.upload_id, args.path, meta, hotspots_bed, 'hotspots BED') meta["hotspot"] = False if target_regions_bed and not primary_path: register_bed_file(args.upload_id, args.path, meta, target_regions_bed) if hotspots_bed: meta["hotspot"] = True if not secondary_path: register_bed_file(args.upload_id, args.path, meta, hotspots_bed) if meta['is_ampliseq']: try: if not (is_BED_encrypted(meta)): if target_regions_bed and not primary_path: primary_path = os.path.join( args.path, meta["reference"]+"/unmerged/detail/"+target_regions_bed) if hotspots_bed and not secondary_path: secondary_path = os.path.join( args.path, meta["reference"]+"/unmerged/detail/"+hotspots_bed) else: run_type = meta['design']['plan'].get('runType', None) if run_type and (run_type == "AMPS_RNA"): meta['reference'] = None plan_prototype, alignmentargs_override = plan_json( meta, args.upload_id, primary_path, secondary_path) success, response, content = api.post("plannedexperiment", **plan_prototype) if not success: api.patch("contentupload", args.upload_id, status="Error: unable to create TS Plan") err_content = json.loads(content) error_message_array = [] if 'error' in err_content: error_json = json.loads(str(err_content['error'][3:-2])) for k in error_json: for j in range(len(error_json[k])): err_message = str(error_json[k][j]) err_message = err_message.replace('>', '>') error_message_array.append(err_message) error_messages = ','.join(error_message_array) raise Exception(error_messages) if alignmentargs_override: content_dict = json.loads(content) api.patch("plannedexperiment", content_dict[ "id"], alignmentargs=alignmentargs_override, thumbnailalignmentargs=alignmentargs_override) except Exception as err: print("ERROR: Could not create plan from this zip: %s." % err) raise api.update_meta(meta, args)
def main(): parse = argparse.ArgumentParser() parse.add_argument('upload_id', type=int) parse.add_argument('path') parse.add_argument('upload_file') parse.add_argument('meta_file') try: args = parse.parse_args() except IOError as err: print("Input file error: %s" % err) parse.print_help() sys.exit(1) meta_file_handle = open(args.meta_file,'r') meta = json.load(meta_file_handle, parse_float=Decimal) meta_file_handle.close() is_zip = zipfile.is_zipfile(args.upload_file) if is_zip: files = file_utils.unzip_archive(args.path, args.upload_file) elif args.upload_file.endswith('.gz'): files = [os.path.basename(args.upload_file[:-3])] cmd = 'gzip -dc %s > %s ' % (args.upload_file, os.path.join(args.path,files[0])) subprocess.call(cmd, shell=True) else: files = [args.upload_file] ''' Establish the upload type ''' target_regions_bed = None hotspots_bed = None meta['is_ampliseq'] = False if len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot',False) == False: target_regions_bed = os.path.basename(files[0]) meta['is_ampliseq'] = False elif len(files) == 1 and files[0].endswith('.bed') and meta.get('hotspot',False) == True: hotspots_bed = os.path.basename(files[0]) meta['is_ampliseq'] = False elif len(files) == 1 and files[0].endswith('.vcf') and meta.get('hotspot',False) == True: hotspots_bed = os.path.basename(files[0]) + '.bed' convert_command = '/usr/local/bin/tvcutils prepare_hotspots' convert_command += ' --input-vcf %s' % os.path.join(args.path,os.path.basename(files[0])) convert_command += ' --output-bed %s' % os.path.join(args.path,hotspots_bed) convert_command += ' --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (meta["reference"],meta["reference"]) #TODO: is this just name or full path?? convert_command += ' --filter-bypass on' process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) for line in process.communicate()[0].splitlines(): api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip()) meta['is_ampliseq'] = False elif "plan.json" in files: meta['is_ampliseq'] = True plan_data = json.load(open(os.path.join(args.path, "plan.json"))) version, design = ampliseq.handle_versioned_plans(plan_data, meta) meta['design'] = design try: target_regions_bed = design['plan']['designed_bed'] hotspots_bed = design['plan']['hotspot_bed'] if not meta.get("reference", None): meta['reference'] = design['genome'].lower() if 'design_name' in plan_data: meta['description'] = design['design_name'] meta_file_handle = open(args.meta_file,'w') json.dump(meta, meta_file_handle, cls=JSONEncoder) meta_file_handle.close() api.patch("contentupload", args.upload_id, meta=meta) except KeyError as err: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text="Malformed AmpliSeq archive: missing json key "+str(err)) raise if target_regions_bed and target_regions_bed not in files: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text="Malformed AmpliSeq archive: Target region file %s not present in AmpliSeq archive" % target_regions_bed) raise ValueError("Target region file %s not present in AmpliSeq archive" % target_regions_bed) if hotspots_bed and hotspots_bed not in files: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text="Malformed AmpliSeq archive: Hotspots file %s not present in AmpliSeq archive" % target_regions_bed) raise ValueError("Hotspots file %s not present in AmpliSeq archive" % target_regions_bed) else: api.patch("contentupload", args.upload_id, status="Error: Upload must be either valid Ampliseq export or contain a single BED or VCF file.") raise ValueError("Upload must be either valid Ampliseq export or contain a single BED or VCF file.") ''' === Validate and Register === ''' primary_path = None secondary_path = None if target_regions_bed: primary_path = validate(args.upload_id, args.path, meta, target_regions_bed, 'target regions BED') if hotspots_bed: secondary_path = validate(args.upload_id, args.path, meta, hotspots_bed, 'hotspots BED') meta["hotspot"] = False if target_regions_bed and not primary_path: register_bed_file(args.upload_id, args.path, meta, target_regions_bed) if hotspots_bed: meta["hotspot"] = True if not secondary_path: register_bed_file(args.upload_id, args.path, meta, hotspots_bed) if meta['is_ampliseq']: try: if target_regions_bed and not primary_path: primary_path = os.path.join(args.path, meta["reference"]+"/unmerged/detail/"+target_regions_bed) if hotspots_bed and not secondary_path: secondary_path = os.path.join(args.path, meta["reference"]+"/unmerged/detail/"+hotspots_bed) plan_prototype = plan_json(meta, primary_path, secondary_path) api.post("plannedexperiment", **plan_prototype) except Exception as err: print("Could not create plan from this zip: %s" % err) meta_file_handle = open(args.meta_file,'w') json.dump(meta, meta_file_handle, cls=JSONEncoder) meta_file_handle.close() api.patch("contentupload", args.upload_id, meta=meta)
def main(): parse = argparse.ArgumentParser() parse.add_argument('upload_id', type=int) parse.add_argument('path') parse.add_argument('upload_file') parse.add_argument('meta_file') try: args = parse.parse_args() except IOError as err: print("ERROR: Input file error: %s" % err) parse.print_help() sys.exit(1) meta_file_handle = open(args.meta_file, 'r') meta = json.load(meta_file_handle, parse_float=Decimal) meta_file_handle.close() print "Uploaded file: " + os.path.basename(args.upload_file) is_zip = zipfile.is_zipfile(args.upload_file) if is_zip: files = file_utils.unzip_archive(args.path, args.upload_file) print "Compressed: Yes (zip)" elif args.upload_file.endswith('.gz'): print "Compressed: Yes (gzip)" files = [os.path.basename(args.upload_file[:-3])] cmd = 'gzip -dc %s > %s ' % (args.upload_file, os.path.join(args.path, files[0])) p = subprocess.Popen(cmd, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) print p.communicate()[0] if p.returncode != 0: sys.exit(p.returncode) subprocess.call(cmd, shell=True) else: print "Compressed: No" files = [args.upload_file] ''' Establish the upload type ''' target_regions_bed = None hotspots_bed = None meta['is_ampliseq'] = False if len(files) == 1 and files[0].endswith('.bed') and meta.get( 'hotspot', False) == False: target_regions_bed = os.path.basename(files[0]) meta['is_ampliseq'] = False print "Content: Target regions file in BED format" print elif len(files) == 1 and files[0].endswith('.bed') and meta.get( 'hotspot', False) == True: hotspots_bed = os.path.basename(files[0]) meta['is_ampliseq'] = False print "Content: Hotspots file in BED format" print elif len(files) == 1 and files[0].endswith('.vcf') and meta.get( 'hotspot', False) == True: print "Content: Hotspots file in VCF format" print print "Converting hotspot VCF file to BED: %s" % files[0] print hotspots_bed = os.path.basename(files[0]) + '.bed' convert_command = '/usr/local/bin/tvcutils prepare_hotspots' convert_command += ' --input-vcf %s' % os.path.join( args.path, os.path.basename(files[0])) convert_command += ' --output-bed %s' % os.path.join( args.path, hotspots_bed) convert_command += ' --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % ( meta["reference"], meta["reference"]) convert_command += ' --filter-bypass on' p = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) print p.communicate()[0] if p.returncode != 0: sys.exit(p.returncode) #process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) #for line in process.communicate()[0].splitlines(): # api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip()) meta['is_ampliseq'] = False elif "plan.json" in files: print "Content: AmpliSeq ZIP" print meta['is_ampliseq'] = True plan_data = json.load(open(os.path.join(args.path, "plan.json"))) version, design, meta = ampliseq.handle_versioned_plans( plan_data, meta) meta['design'] = design try: target_regions_bed = design['plan']['designed_bed'] hotspots_bed = design['plan']['hotspot_bed'] if not meta.get("reference", None): meta['reference'] = design['genome'].lower() if 'design_name' in plan_data: meta['description'] = design['design_name'] meta_file_handle = open(args.meta_file, 'w') json.dump(meta, meta_file_handle, cls=JSONEncoder) meta_file_handle.close() api.patch("contentupload", args.upload_id, meta=meta) except KeyError as err: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") print "ERROR: Malformed AmpliSeq archive: missing json key " + str( err) sys.exit(1) #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text="Malformed AmpliSeq archive: missing json key "+str(err)) #raise if target_regions_bed and target_regions_bed not in files: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") print "ERROR: Target region file %s not present in AmpliSeq archive" % target_regions_bed sys.exit(1) #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), # text="Malformed AmpliSeq archive: Target region file %s not present in AmpliSeq archive" % target_regions_bed) #raise ValueError("Target region file %s not present in AmpliSeq archive" % target_regions_bed) if hotspots_bed and hotspots_bed not in files: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") print "ERROR: Hotspots file %s not present in AmpliSeq archive" % target_regions_bed sys.exit(1) #api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), # text="Malformed AmpliSeq archive: Hotspots file %s not present in AmpliSeq archive" % target_regions_bed) #raise ValueError("Hotspots file %s not present in AmpliSeq archive" % target_regions_bed) else: api.patch("contentupload", args.upload_id, status="Error: Unrecognized upload type.") print print "ERROR: Unrecognized upload type. Upload must be either a valid Ampliseq ZIP or contain a single BED or VCF file." sys.exit(1) ''' === Validate and Register === ''' primary_path = None secondary_path = None if target_regions_bed: primary_path = validate(args.upload_id, args.path, meta, target_regions_bed, 'target regions BED') if hotspots_bed: secondary_path = validate(args.upload_id, args.path, meta, hotspots_bed, 'hotspots BED') meta["hotspot"] = False if target_regions_bed and not primary_path: register_bed_file(args.upload_id, args.path, meta, target_regions_bed) if hotspots_bed: meta["hotspot"] = True if not secondary_path: register_bed_file(args.upload_id, args.path, meta, hotspots_bed) if meta['is_ampliseq']: try: if target_regions_bed and not primary_path: primary_path = os.path.join( args.path, meta["reference"] + "/unmerged/detail/" + target_regions_bed) if hotspots_bed and not secondary_path: secondary_path = os.path.join( args.path, meta["reference"] + "/unmerged/detail/" + hotspots_bed) plan_prototype = plan_json(meta, args.upload_id, primary_path, secondary_path) success, response, content = api.post("plannedexperiment", **plan_prototype) if not success: api.patch("contentupload", args.upload_id, status="Error: unable to create TS Plan") raise Exception("Plan creation API request failed.") except Exception as err: print("ERROR: Could not create plan from this zip: %s" % err) raise meta_file_handle = open(args.meta_file, 'w') json.dump(meta, meta_file_handle, cls=JSONEncoder) meta_file_handle.close() api.patch("contentupload", args.upload_id, meta=meta)
def pre_process(): parse = argparse.ArgumentParser() parse.add_argument('upload_id', type=int) parse.add_argument('path') parse.add_argument('upload_file') parse.add_argument('meta_file', type=argparse.FileType('r+')) try: args = parse.parse_args() except IOError as err: print("Input file error: %s" % err) parse.print_help() sys.exit(1) meta = json.load(args.meta_file, parse_float=Decimal) meta.update({ "is_ampliseq": None, "primary_bed": None, "hotspot_bed": None }) is_zip = zipfile.is_zipfile(args.upload_file) if is_zip: files = unzip_archive(args.path, args.upload_file) else: files = [args.upload_file] if len(files) == 1 and files[0].endswith('.bed'): meta['is_ampliseq'] = False meta['primary_bed'] = files[0] elif len(files) == 1 and files[0].endswith('.vcf') and meta['hotspot']: # convert vcf to bed target_filename = os.path.join(args.path, os.path.basename( files[0])) + '.bed' convert_command = '/usr/local/bin/tvcutils prepare_hotspots' convert_command += ' --input-vcf %s' % files[0] convert_command += ' --output-bed %s' % target_filename convert_command += ' --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % ( meta["reference"], meta["reference"] ) #TODO: is this just name or full path?? convert_command += ' --filter-bypass on' process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) for line in process.communicate()[0].splitlines(): api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip()) meta['is_ampliseq'] = False meta['primary_bed'] = target_filename ''' elif len(files) == 1 and files[0].endswith('.vcf') and meta['hotspot']: # convert vcf to bed target_filename = os.path.join(args.path,os.path.basename(files[0])) + '.bed' convert_command = '/usr/local/bin/tvcutils prepare_hotspots' convert_command += ' --input-vcf %s' % files[0] convert_command += ' --output-bed %s' % target_filename convert_command += ' --reference /results/referenceLibrary/tmap-f3/%s/%s.fasta' % (meta["reference"],meta["reference"]) #TODO: is this just name or full path?? convert_command += ' --filter-bypass on' process = subprocess.Popen(convert_command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) for line in process.communicate()[0]: api.post('log', upload='/rundb/api/v1/contentupload/%s/' % str(args.upload_id), text=line.strip()) meta['is_ampliseq'] = False meta['primary_bed'] = target_filename ''' elif "plan.json" in files: print("Found ampliseq") meta['is_ampliseq'] = True plan_data = json.load(open(os.path.join(args.path, "plan.json")), parse_float=Decimal) version, design = ampliseq.handle_versioned_plans(plan_data) meta['design'] = design plan = design['plan'] try: meta['primary_bed'] = plan['designed_bed'] meta['secondary_bed'] = plan['hotspot_bed'] if not meta.get("reference", None): meta['reference'] = design['genome'].lower() except KeyError as err: api.patch("contentupload", args.upload_id, status="Error: malformed AmpliSeq archive") raise print(meta) else: raise ValueError( "Upload must be either valid Ampliseq export or contain a single BED file." ) args.meta_file.truncate(0) args.meta_file.seek(0) json.dump(meta, args.meta_file, cls=JSONEncoder) api.patch("contentupload", args.upload_id, meta=meta)