def new_reference_genome(reference_args, url=None, reference_file=None, callback_task=None, reference_mask_filename=None): # check if the genome already exists if ReferenceGenome.objects.filter(short_name=reference_args['short_name'], index_version=settings.TMAP_VERSION): raise Exception("Failed - Genome %s already exists" % reference_args['short_name']) reference = ReferenceGenome(**reference_args) reference.enabled = False reference.status = "queued" reference.save() if url: async_result = start_reference_download( url, reference, callback_task, reference_mask_filename=reference_mask_filename) elif reference_file: async_result = tasks.install_reference.apply_async( ((reference_file, None), reference.id), link=callback_task) else: raise Exception('Failed creating new genome reference: No source file') reference.celery_task_id = async_result.task_id reference.save() return reference
def new_reference_genome(reference_args, url=None, reference_file=None, callback_task=None, reference_mask_filename=None): # check if the genome already exists if ReferenceGenome.objects.filter(short_name=reference_args['short_name'], index_version=settings.TMAP_VERSION): raise Exception("Failed - Genome %s already exists" % reference_args['short_name']) reference = ReferenceGenome(**reference_args) reference.enabled = False reference.status = "queued" reference.save() if url: async_result = start_reference_download(url, reference, callback_task, reference_mask_filename=reference_mask_filename) elif reference_file: async_result = tasks.install_reference.apply_async(((reference_file, None), reference.id), link=callback_task) else: raise Exception('Failed creating new genome reference: No source file') reference.celery_task_id = async_result.task_id reference.save() return reference
def search_for_genomes(): """ Searches for new genomes. This will sync the file system and the genomes know by the database """ def set_common(dest, genome_dict, ref_dir, lib): try: dest.name = genome_dict["genome_name"] dest.version = genome_dict["genome_version"] dest.index_version = genome_dict["index_version"] dest.reference_path = os.path.join(ref_dir, dest.index_version, dest.short_name) except: dest.name = lib dest.status = "missing info.txt" return dest ref_dir = '/results/referenceLibrary' lib_versions = [] for folder in os.listdir(ref_dir): if os.path.isdir(os.path.join(ref_dir, folder)) and folder.lower().startswith("tmap"): lib_versions.append(folder) logger.debug("Reference genome scanner found %s" % ",".join(lib_versions)) for lib_version in lib_versions: if os.path.exists(os.path.join(ref_dir, lib_version)): libs = os.listdir(os.path.join(ref_dir, lib_version)) for lib in libs: genome_info_text = os.path.join(ref_dir, lib_version, lib, lib + ".info.txt") genome_dict = _read_genome_info(genome_info_text) #TODO: we have to take into account the genomes that are queue for creation of in creation if genome_dict: #here we trust that the path the genome is in, is also the short name existing_reference = ReferenceGenome.objects.filter( short_name=lib).order_by("-index_version")[:1] if existing_reference: rg = existing_reference[0] if rg.index_version != genome_dict["index_version"]: logger.debug("Updating genome status to 'found' for %s id=%d index=%s" % ( str(rg), rg.id, rg.index_version)) rg.status = "complete" rg = set_common(rg, genome_dict, ref_dir, lib) rg.save() else: logger.info("Found new genome %s index=%s" % ( lib, genome_dict["genome_version"])) #the reference was not found, add it to the db rg = ReferenceGenome() rg.short_name = lib rg.date = datetime.datetime.now() rg.status = "complete" rg.enabled = True rg.index_version = "" rg.version = "" rg.name = "" rg = set_common(rg, genome_dict, ref_dir, lib) rg.save() logger.info("Created new reference genome %s id=%d" % ( str(rg), rg.id))
def new_reference_download(url, reference_args): reference = ReferenceGenome(**reference_args) reference.enabled = False reference.status = "downloading" reference.save() return start_reference_download(url, reference)
def new_genome(request): """This is the page to create a new genome. """ if request.method == "POST": # parse the data sent in #required name = request.POST.get('name', False) short_name = request.POST.get('short_name', False) fasta = request.POST.get('target_file', False) version = request.POST.get('version', "") notes = request.POST.get('notes', "") #optional read_exclude_length = request.POST.get('read_exclude_length', False) #URL download url = request.POST.get('url', False) reference_path = os.path.join(settings.TEMP_PATH, fasta) why_delete = "" #if any of those were false send back a failed message if not all((name, short_name, fasta)): return render_to_json({"status": "Form validation failed", "error": True}) if not set(short_name).issubset("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"): return render_to_json({"status": "The short name has invalid characters. The valid values are letters, numbers, and underscores.", "error": True}) #TODO: check to make sure the zip file only has one fasta or fa if not url: #check to ensure the size on the OS the same as the reported. reported_file_size = request.POST.get('reported_file_size', False) try: uploaded_file_size = str(os.path.getsize(reference_path)) except OSError: return render_to_json({"status": "The FASTA temporary files was not found", "error": True}) if reported_file_size != uploaded_file_size: why_delete = "The file you uploaded differs from the expected size. This is due to an error uploading." if not (fasta.lower().endswith(".fasta") or fasta.lower().endswith(".zip")): why_delete = "The file you uploaded does not have a .fasta or .zip extension. It must be a plain text fasta file or a Zip compressed fasta." is_zip = zipfile.is_zipfile(reference_path) if is_zip: zip_file = zipfile.ZipFile(reference_path, 'r') files = zip_file.namelist() # MAC OS zip is being compressed with __MACOSX folder Ex: '__MACOSX/', '__MACOSX/._contigs_2.fasta'. # Filter out those files and Upload only FASTA file files = [x for x in files if not 'MACOSX' in x] zip_file.close() else: files = [fasta] fasta_files = filter(lambda x: x.endswith('.fa') or x.endswith('.fasta'), files) if len(fasta_files) != 1: why_delete = "Error: upload must contain exactly one fasta file" else: target_fasta_file = fasta_files[0] if why_delete: try: os.remove(reference_path) except OSError: why_delete += " The FASTA file could not be deleted." logger.warning("User uploaded bad fasta file: " + str(why_delete)) return render_to_json({"status": why_delete, "error": True}) #Make an genome ref object if ReferenceGenome.objects.filter(short_name=short_name, index_version=settings.TMAP_VERSION): #check to see if the genome already exists in the database with the same version return render_to_json({"status": "Failed - Genome with this short name and index version already exist.", "error": True}) ref_genome = ReferenceGenome() ref_genome.name = name ref_genome.short_name = short_name ref_genome.version = version ref_genome.notes = notes ref_genome.status = "preprocessing" ref_genome.enabled = False ref_genome.index_version = settings.TMAP_VERSION ref_genome.save() logger.debug("Created new reference: %d/%s" % (ref_genome.pk, ref_genome)) temp_dir = tempfile.mkdtemp(suffix=short_name, dir=settings.TEMP_PATH) temp_upload_path = os.path.join(temp_dir, fasta) os.chmod(temp_dir, 0777) os.rename(reference_path, temp_upload_path) monitor = FileMonitor( local_dir=temp_dir, name=fasta ) monitor.save() ref_genome.file_monitor = monitor ref_genome.reference_path = temp_upload_path ref_genome.save() index_task = tasks.build_tmap_index.subtask((ref_genome.id,), immutable=True) if is_zip: result = tasks.unzip_reference.apply_async( args=(ref_genome.id, target_fasta_file), link=index_task ) else: result = tasks.copy_reference.apply_async( args=(ref_genome.id,), link=index_task ) ref_genome.status = "queued" ref_genome.celery_task_id = result.task_id ref_genome.save() return render_to_json({"status": "The genome index is being created. This might take a while, check the status on the references tab. \ You are being redirected there now.", "error": False}) elif request.method == "GET": ctx = RequestContext(request, {}) return render_to_response("rundb/configure/modal_references_new_genome.html", context_instance=ctx)
def new_genome(request): """This is the page to create a new genome. """ def is_fasta(filename): ext = os.path.splitext(filename)[1] return ext.lower() in ['.fasta', '.fas', '.fa', '.seq'] if request.method == "POST": # parse the data sent in #required name = request.POST.get('name', False) short_name = request.POST.get('short_name', False) fasta = request.POST.get('target_file', False) version = request.POST.get('version', "") notes = request.POST.get('notes', "") #optional read_exclude_length = request.POST.get('read_exclude_length', False) #URL download url = request.POST.get('url', False) reference_path = os.path.join(settings.TEMP_PATH, fasta) why_delete = "" #if any of those were false send back a failed message if not all((name, short_name, fasta)): return render_to_json({ "status": "Form validation failed", "error": True }) if not set(short_name).issubset( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_" ): return render_to_json({ "status": "The short name has invalid characters. The valid values are letters, numbers, and underscores.", "error": True }) #TODO: check to make sure the zip file only has one fasta or fa if not url: #check to ensure the size on the OS the same as the reported. reported_file_size = request.POST.get('reported_file_size', False) try: uploaded_file_size = str(os.path.getsize(reference_path)) except OSError: return render_to_json({ "status": "The FASTA temporary files was not found", "error": True }) if reported_file_size != uploaded_file_size: why_delete = "The file you uploaded differs from the expected size. This is due to an error uploading." if not (is_fasta(fasta) or fasta.lower().endswith(".zip")): why_delete = "The file you uploaded does not have a FASTA or ZIP extension. It must be a plain text a Zip compressed fasta file." is_zip = zipfile.is_zipfile(reference_path) if is_zip: zip_file = zipfile.ZipFile(reference_path, 'r') files = zip_file.namelist() # MAC OS zip is being compressed with __MACOSX folder Ex: '__MACOSX/', '__MACOSX/._contigs_2.fasta'. # Filter out those files and Upload only FASTA file files = [x for x in files if not 'MACOSX' in x] zip_file.close() else: files = [fasta] fasta_files = filter(lambda x: is_fasta(x), files) if len(fasta_files) != 1: why_delete = "Error: upload must contain exactly one fasta file" else: target_fasta_file = fasta_files[0] if why_delete: try: os.remove(reference_path) except OSError: why_delete += " The FASTA file could not be deleted." logger.warning("User uploaded bad fasta file: " + str(why_delete)) return render_to_json({"status": why_delete, "error": True}) #Make an genome ref object if ReferenceGenome.objects.filter(short_name=short_name, index_version=settings.TMAP_VERSION): #check to see if the genome already exists in the database with the same version return render_to_json({ "status": "Failed - Genome with this short name and index version already exist.", "error": True }) ref_genome = ReferenceGenome() ref_genome.name = name ref_genome.short_name = short_name ref_genome.version = version ref_genome.notes = notes ref_genome.status = "preprocessing" ref_genome.enabled = False ref_genome.index_version = settings.TMAP_VERSION ref_genome.save() logger.debug("Created new reference: %d/%s" % (ref_genome.pk, ref_genome)) temp_dir = tempfile.mkdtemp(suffix=short_name, dir=settings.TEMP_PATH) temp_upload_path = os.path.join(temp_dir, fasta) os.chmod(temp_dir, 0777) os.rename(reference_path, temp_upload_path) monitor = FileMonitor(local_dir=temp_dir, name=fasta) monitor.save() ref_genome.file_monitor = monitor ref_genome.reference_path = temp_upload_path ref_genome.save() index_task = tasks.build_tmap_index.subtask((ref_genome.id, ), immutable=True) if is_zip: result = tasks.unzip_reference.apply_async( args=(ref_genome.id, target_fasta_file), link=index_task) else: result = tasks.copy_reference.apply_async(args=(ref_genome.id, ), link=index_task) ref_genome.status = "queued" ref_genome.celery_task_id = result.task_id ref_genome.save() return render_to_json({ "status": "The genome index is being created. This might take a while, check the status on the references tab. \ You are being redirected there now.", "error": False }) elif request.method == "GET": ctx = RequestContext(request, {}) return render_to_response( "rundb/configure/modal_references_new_genome.html", context_instance=ctx)
def new_genome(request): """This is the page to create a new genome. The XML-RPC server is ionJobServer. """ if request.method == "POST": # parse the data sent in #required name = request.POST.get('name', False) short_name = request.POST.get('short_name', False) fasta = request.POST.get('target_file', False) version = request.POST.get('version', False) notes = request.POST.get('notes', "") #optional read_sample_size = request.POST.get('read_sample_size', False) read_exclude_length = request.POST.get('read_exclude_length', False) #URL download url = request.POST.get('url', False) error_status = "" reference_path = REFERENCE_LIBRARY_TEMP_DIR + fasta why_delete = "" #if any of those were false send back a failed message if not all((name, short_name, fasta, version)): return render_to_json({"status": "Form validation failed", "error": True}) if not set(short_name).issubset("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"): return render_to_json({"status": "The short name has invalid characters. The valid values are letters, numbers, and underscores.", "error": True}) #TODO: check to make sure the zip file only has one fasta or fa if not url: #check to ensure the size on the OS the same as the reported. reported_file_size = request.POST.get('reported_file_size', False) try: uploaded_file_size = str(os.path.getsize(reference_path)) except OSError: return render_to_json({"status": "The FASTA temporary files was not found", "error": True}) if reported_file_size != uploaded_file_size: why_delete = "The file you uploaded differs from the expected size. This is due to an error uploading." if not (fasta.lower().endswith(".fasta") or fasta.lower().endswith(".zip")): why_delete = "The file you uploaded does not have a .fasta or .zip extension. It must be a plain text fasta file or a Zip compressed fasta." if why_delete: try: os.remove(reference_path) except OSError: why_delete += " The FASTA file could not be deleted." return render_to_json({"status": why_delete, "error": True}) #Make an genome ref object if ReferenceGenome.objects.filter(short_name=short_name, index_version=settings.TMAP_VERSION): #check to see if the genome already exists in the database with the same version return render_to_json({"status": "Failed - Genome with this short name and index version already exist.", "error": True}) ref_genome = ReferenceGenome() ref_genome.name = name ref_genome.short_name = short_name ref_genome.version = version ref_genome.date = datetime.datetime.now() ref_genome.notes = notes ref_genome.status = "queued" ref_genome.enabled = False ref_genome.index_version = settings.TMAP_VERSION #before the object is saved we should ping the xml-rpc server to see if it is alive. try: conn = client.connect(JOBSERVER_HOST, settings.JOBSERVER_PORT) #just check uptime to make sure the call does not fail conn.uptime() logger.debug('Connected to ionJobserver process.') except (socket.error, xmlrpclib.Fault): return render_to_json({"status": "Unable to connect to ionJobserver process. You may need to restart ionJobserver", "error": True}) #if the above didn't fail then we can save the object #this object must be saved before the tmap call is made ref_genome.save() logger.debug('Saved ReferenceGenome %s' % ref_genome.__dict__) #kick off the anaserve tmap xmlrpc call import traceback try: conn = client.connect(JOBSERVER_HOST, settings.JOBSERVER_PORT) tmap_bool, tmap_status = conn.tmap(str(ref_genome.id), fasta, short_name, name, version, read_sample_size, read_exclude_length, settings.TMAP_VERSION) logger.debug('ionJobserver process reported %s %s' % (tmap_bool, tmap_status)) except (socket.error, xmlrpclib.Fault): #delete the genome object, because it was not sucessful ref_genome.delete() return render_to_json({"status": "Error with index creation", "error": traceback.format_exc()}) if not tmap_bool: ref_genome.delete() return render_to_json({"status": tmap_status, "error": True}) return render_to_json({"status": "The genome index is being created. This might take a while, check the status on the references tab. \ You are being redirected there now.", "error": False}) elif request.method == "GET": ctx = RequestContext(request, {}) return render_to_response("rundb/configure/modal_references_new_genome.html", context_instance=ctx)
def new_genome(request): """This is the page to create a new genome. The XML-RPC server is ionJobServer. """ if request.method == "POST": # parse the data sent in #required name = request.POST.get('name', False) short_name = request.POST.get('short_name', False) fasta = request.POST.get('target_file', False) version = request.POST.get('version', False) notes = request.POST.get('notes', "") #optional read_exclude_length = request.POST.get('read_exclude_length', False) #URL download url = request.POST.get('url', False) error_status = "" reference_path = REFERENCE_LIBRARY_TEMP_DIR + fasta why_delete = "" #if any of those were false send back a failed message if not all((name, short_name, fasta, version)): return render_to_json({ "status": "Form validation failed", "error": True }) if not set(short_name).issubset( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_" ): return render_to_json({ "status": "The short name has invalid characters. The valid values are letters, numbers, and underscores.", "error": True }) #TODO: check to make sure the zip file only has one fasta or fa if not url: #check to ensure the size on the OS the same as the reported. reported_file_size = request.POST.get('reported_file_size', False) try: uploaded_file_size = str(os.path.getsize(reference_path)) except OSError: return render_to_json({ "status": "The FASTA temporary files was not found", "error": True }) if reported_file_size != uploaded_file_size: why_delete = "The file you uploaded differs from the expected size. This is due to an error uploading." if not (fasta.lower().endswith(".fasta") or fasta.lower().endswith(".zip")): why_delete = "The file you uploaded does not have a .fasta or .zip extension. It must be a plain text fasta file or a Zip compressed fasta." if why_delete: try: os.remove(reference_path) except OSError: why_delete += " The FASTA file could not be deleted." return render_to_json({"status": why_delete, "error": True}) #Make an genome ref object if ReferenceGenome.objects.filter(short_name=short_name, index_version=settings.TMAP_VERSION): #check to see if the genome already exists in the database with the same version return render_to_json({ "status": "Failed - Genome with this short name and index version already exist.", "error": True }) ref_genome = ReferenceGenome() ref_genome.name = name ref_genome.short_name = short_name ref_genome.version = version ref_genome.date = datetime.datetime.now() ref_genome.notes = notes ref_genome.status = "queued" ref_genome.enabled = False ref_genome.index_version = settings.TMAP_VERSION #before the object is saved we should ping the xml-rpc server to see if it is alive. try: conn = client.connect(JOBSERVER_HOST, settings.JOBSERVER_PORT) #just check uptime to make sure the call does not fail conn.uptime() logger.debug('Connected to ionJobserver process.') except (socket.error, xmlrpclib.Fault): return render_to_json({ "status": "Unable to connect to ionJobserver process. You may need to restart ionJobserver", "error": True }) #if the above didn't fail then we can save the object #this object must be saved before the tmap call is made ref_genome.save() logger.debug('Saved ReferenceGenome %s' % ref_genome.__dict__) #kick off the anaserve tmap xmlrpc call import traceback try: conn = client.connect(JOBSERVER_HOST, settings.JOBSERVER_PORT) tmap_bool, tmap_status = conn.tmap(str(ref_genome.id), fasta, short_name, name, version, read_exclude_length, settings.TMAP_VERSION) logger.debug('ionJobserver process reported %s %s' % (tmap_bool, tmap_status)) except (socket.error, xmlrpclib.Fault): #delete the genome object, because it was not sucessful ref_genome.delete() return render_to_json({ "status": "Error with index creation", "error": traceback.format_exc() }) if not tmap_bool: ref_genome.delete() return render_to_json({"status": tmap_status, "error": True}) return render_to_json({ "status": "The genome index is being created. This might take a while, check the status on the references tab. \ You are being redirected there now.", "error": False }) elif request.method == "GET": ctx = RequestContext(request, {}) return render_to_response( "rundb/configure/modal_references_new_genome.html", context_instance=ctx)