def handle(self, *args, **options): self.stdout.write("Stating") utils = Utils() software = Software() count = 0 for reference in Reference.objects.all(): count += 1 bed_path = reference.get_reference_bed(TypePath.MEDIA_ROOT) self.stdout.write("{} {}: ".format( count, reference.reference_genbank_name)) self.logger_production.info("{} {}: ".format( count, reference.reference_genbank_name)) self.logger_debug.info("{} {}: ".format( count, reference.reference_genbank_name)) if os.path.exists(bed_path): self.stdout.write("already exist\n") continue ### create bed and index for genbank utils.from_genbank_to_bed( reference.get_reference_gbk(TypePath.MEDIA_ROOT), reference.get_reference_bed(TypePath.MEDIA_ROOT)) software.create_index_files_from_igv_tools( reference.get_reference_bed(TypePath.MEDIA_ROOT)) self.stdout.write("created\n") self.stdout.write("End")
def handle(self, *args, **options): software = Software() project_sample_id = options['project_sample_id'] user_id = options['user_id'] self.stdout.write("Starting for project_sample_id: " + str(project_sample_id)) self.logger_production.info("Starting for project_sample_id: " + str(project_sample_id)) self.logger_debug.info("Starting for project_sample_id: " + str(project_sample_id)) try: project_sample = ProjectSample.objects.get(pk=project_sample_id) if (user_id == None): user = project_sample.project.owner else: user = User.objects.get(pk=user_id) software.process_second_stage_snippy_coverage_freebayes( project_sample, user) self.stdout.write("End") except ProjectSample.DoesNotExist as e: self.stdout.write( "Error: ProjectSample id '{}' does not exist.".format( project_sample_id)) except User.DoesNotExist as e: self.stdout.write( "Error: User id '{}' does not exist.".format(user_id))
def handle(self, *args, **options): software = Software() for project in Project.objects.all(): if (project.is_deleted): continue self.stdout.write("Processing project: {}".format(project.name)) for project_sample in project.project_samples.all(): if (not project_sample.get_is_ready_to_proccess()): continue self.stdout.write("Processing sample: {}".format( project_sample.sample.name)) software.creat_new_reference_to_snippy(project_sample) self.stdout.write("End") self.stdout.write("Finished")
def upload_default_files(self): """ Upload default files """ ## only runs once, wen start ans test if the file was uploaded with virus hypothesis from manage_virus.uploadFiles import UploadFiles from utils.software import Software uploadFiles = UploadFiles() ## get version and pah b_test = False (version, path) = uploadFiles.get_file_to_upload(b_test) ## uplaod uploadFile = uploadFiles.upload_file(version, path) # create the abricate database if (uploadFile != None): software = Software() if (not software.is_exist_database_abricate( uploadFile.abricate_name)): software.create_database_abricate(uploadFile.abricate_name, uploadFile.path)
def handle(self, *args, **options): software = Software() sample_id = options['sample_id'] user_id = options['user_id'] self.stdout.write("Starting for sample_id: " + str(sample_id)) self.logger_production.info("Starting for sample_id: " + str(sample_id)) self.logger_debug.info("Starting for sample_id: " + str(sample_id)) try: sample = Sample.objects.get(pk=sample_id) if (user_id == None): user = sample.owner else: user = User.objects.get(pk=user_id) b_return = software.run_fastq_and_trimmomatic_and_identify_species( sample, user) self.stdout.write("Resulting: " + str(b_return)) self.stdout.write("End") except Sample.DoesNotExist as e: self.stdout.write( "Error: Sample id '{}' does not exist.".format(sample_id)) except User.DoesNotExist as e: self.stdout.write( "Error: User id '{}' does not exist.".format(user_id))
class ReferenceForm(forms.ModelForm): """ Reference form, name, isolate_name and others """ utils = Utils() software = Software() error_css_class = 'error' class Meta: model = Reference # specify what fields should be used in this form. fields = ('name', 'isolate_name', 'reference_fasta', 'reference_genbank') def __init__(self, *args, **kwargs): self.request = kwargs.pop('request') super(ReferenceForm, self).__init__(*args, **kwargs) ## can exclude explicitly ## exclude = ('md5',) field_text = [ # (field_name, Field title label, Detailed field description, requiered) ('name', 'Name', 'Regular name for this reference', True), ('isolate_name', 'Isolate name', 'Isolate name for this reference', False), ('reference_fasta', 'Reference (fasta)', 'Reference file in fasta format', True), ('reference_genbank', 'Reference (genBank)', """Reference file in genBank format.<br> Locus designations in the multi-GenBank file must have the same name as in the respective fasta file.<br> If you do not upload a Genbank file, INSaFLU will annotate the upload fasta file for you""", False), ] for x in field_text: self.fields[x[0]].label = x[1] self.fields[x[0]].help_text = x[2] self.fields[x[0]].required = x[3] ## in case you have to undo to specific ID ## cancel_url = reverse('references') ## if self.instance: cancel_url = reverse('references', kwargs={'pk': self.instance.id}) self.helper = FormHelper() self.helper.form_method = 'POST' self.helper.layout = Layout( Div(Div('name', css_class="col-sm-3"), Div('isolate_name', css_class="col-sm-7"), css_class='row'), Div('reference_fasta', css_class='show-for-sr'), Div('reference_genbank', css_class='show-for-sr'), ButtonHolder( Submit('save', 'Save', css_class='btn-primary'), Button('cancel', 'Cancel', css_class='btn-secondary', onclick='window.location.href="{}"'.format( reverse('references'))))) def clean(self): """ Clean all together because it's necessary to compare the genbank and fasta files """ cleaned_data = super(ReferenceForm, self).clean() name = cleaned_data['name'] try: Reference.objects.get(name__iexact=name, owner=self.request.user, is_obsolete=False, is_deleted=False) self.add_error( 'name', _("This name '" + name + "' already exist in database, please choose other.")) except Reference.DoesNotExist: pass ## test reference_fasta if ('reference_fasta' not in cleaned_data): self.add_error('reference_fasta', _("Error: Must have a file.")) return cleaned_data ### testing file names reference_fasta = cleaned_data['reference_fasta'] reference_genbank = cleaned_data['reference_genbank'] if (reference_genbank != None and reference_fasta.name == reference_genbank.name): self.add_error( 'reference_fasta', _("Error: both files has the same name. Please, different files." )) self.add_error( 'reference_genbank', _("Error: both files has the same name. Please, different files." )) return cleaned_data ## testing fasta some_error_in_files = False reference_fasta_temp_file_name = NamedTemporaryFile(prefix='flu_fa_', delete=False) reference_fasta_temp_file_name.write(reference_fasta.read()) reference_fasta_temp_file_name.flush() reference_fasta_temp_file_name.close() self.software.dos_2_unix(reference_fasta_temp_file_name.name) try: number_locus = self.utils.is_fasta( reference_fasta_temp_file_name.name) self.request.session[ Constants.NUMBER_LOCUS_FASTA_FILE] = number_locus ## test the max numbers if (number_locus > Constants.MAX_SEQUENCES_FROM_FASTA): self.add_error( 'reference_fasta', _('Max allow number of sequences in fasta: {}'.format( Constants.MAX_SEQUENCES_FROM_FASTA))) some_error_in_files = True total_length_fasta = self.utils.get_total_length_fasta( reference_fasta_temp_file_name.name) if (not some_error_in_files and total_length_fasta > Constants.MAX_LENGTH_SEQUENCE_TOTAL_FROM_FASTA): some_error_in_files = True self.add_error( 'reference_fasta', _('The length sum of the sequences in fasta: {}'.format( Constants.MAX_LENGTH_SEQUENCE_TOTAL_FROM_FASTA))) n_seq_name_bigger_than = self.utils.get_number_seqs_names_bigger_than( reference_fasta_temp_file_name.name, Constants.MAX_LENGTH_SEQ_NAME) if (not some_error_in_files and n_seq_name_bigger_than > 0): some_error_in_files = True if (n_seq_name_bigger_than == 1): self.add_error( 'reference_fasta', _('There is one sequence name length bigger than {0}. The max. length name is {0}. Prokka constrainments.' .format(Constants.MAX_LENGTH_SEQ_NAME))) else: self.add_error( 'reference_fasta', _('There are {0} sequences with name length bigger than {1}. The max. length name is {1}. Prokka constrainments.' .format(n_seq_name_bigger_than, Constants.MAX_LENGTH_SEQ_NAME))) ## if some errors in the files, fasta or genBank, return if (some_error_in_files): return cleaned_data if (not self.utils.test_sequences_same_length( reference_fasta_temp_file_name.name)): self.add_error( 'reference_fasta', _('There are sequences that have not the same length. This produce errors for samtools faidx.' )) return cleaned_data except IOError as e: ## (e.errno, e.strerror) os.unlink(reference_fasta_temp_file_name.name) some_error_in_files = True self.add_error('reference_fasta', e.args[0]) except: os.unlink(reference_fasta_temp_file_name.name) some_error_in_files = True self.add_error('reference_fasta', "Not a valid 'fasta' file.") ### test if it has degenerated bases if (os.path.exists(reference_fasta_temp_file_name.name)): try: self.utils.has_degenerated_bases( reference_fasta_temp_file_name.name) except Exception as e: os.unlink(reference_fasta_temp_file_name.name) some_error_in_files = True self.add_error('reference_fasta', e.args[0]) ### testing genbank reference_genbank_temp_file_name = NamedTemporaryFile(prefix='flu_gb_', delete=False) reference_genbank = cleaned_data['reference_genbank'] if (reference_genbank != None): reference_genbank_temp_file_name.write(reference_genbank.read()) reference_genbank_temp_file_name.flush() reference_genbank_temp_file_name.close() self.software.dos_2_unix(reference_genbank_temp_file_name.name) try: self.utils.is_genbank(reference_genbank_temp_file_name.name) except IOError as e: some_error_in_files = True os.unlink(reference_genbank_temp_file_name.name) self.add_error('reference_genbank', e.args[0]) except: os.unlink(reference_genbank_temp_file_name.name) some_error_in_files = True self.add_error('reference_genbank', "Not a valid 'genbank' file.") ## if some errors in the files, fasta or genBank, return if (some_error_in_files): return cleaned_data ## test locus names and length of sequences if (reference_genbank != None): try: self.utils.compare_locus_fasta_gb( reference_fasta_temp_file_name.name, reference_genbank_temp_file_name.name) except ValueError as e: self.add_error('reference_fasta', e.args[0]) self.add_error('reference_genbank', e.args[0]) ## remove temp files os.unlink(reference_genbank_temp_file_name.name) os.unlink(reference_fasta_temp_file_name.name) return cleaned_data
def upload_default_references(self, user, b_test): """ upload default files for reference """ from managing_files.models import Reference from utils.software import Software software = Software() path_to_find = os.path.join(getattr(settings, "STATIC_ROOT", None), Constants.DIR_TYPE_REFERENCES) n_upload = 0 for file in self.utils.get_all_files(path_to_find): file = os.path.join(path_to_find, file) try: number_of_elements = self.utils.is_fasta(file) except IOError as e: print(e.args[0]) continue name = self.utils.clean_extension(os.path.basename(file)) try: reference = Reference.objects.get(owner=user, is_obsolete=False, is_deleted=False, name__iexact=name) except Reference.DoesNotExist as e: reference = Reference() reference.display_name = name reference.isolate_name = name reference.name = reference.display_name reference.owner = user reference.is_obsolete = False reference.number_of_locus = number_of_elements reference.hash_reference_fasta = self.utils.md5sum(file) reference.reference_fasta_name = os.path.basename(file) reference.scentific_name = os.path.basename(file) reference.reference_genbank_name = name + FileExtensions.FILE_GBK reference.save() ## move the files to the right place sz_file_to = os.path.join(getattr(settings, "MEDIA_ROOT", None), self.utils.get_path_to_reference_file(user.id, reference.id), reference.reference_fasta_name) self.utils.copy_file(file, sz_file_to) reference.reference_fasta.name = os.path.join(self.utils.get_path_to_reference_file(user.id, reference.id), reference.reference_fasta_name) temp_dir = software.run_prokka(file, os.path.basename(file)) sz_file_to = os.path.join(getattr(settings, "MEDIA_ROOT", None), self.utils.get_path_to_reference_file(user.id, reference.id), reference.reference_genbank_name) self.utils.move_file(os.path.join(temp_dir, reference.reference_genbank_name), sz_file_to) reference.reference_genbank.name = os.path.join(self.utils.get_path_to_reference_file(user.id, reference.id), reference.reference_genbank_name) reference.hash_reference_genbank = self.utils.md5sum(sz_file_to) reference.save() ### create bed and index for genbank self.utils.from_genbank_to_bed(sz_file_to, reference.get_reference_bed(TypePath.MEDIA_ROOT)) software.create_index_files_from_igv_tools(reference.get_reference_bed(TypePath.MEDIA_ROOT)) ### save in database the elements and coordinates self.utils.get_elements_from_db(reference, user) self.utils.get_elements_and_cds_from_db(reference, user) ## create the index before commit in database, throw exception if something goes wrong software.create_fai_fasta(os.path.join(getattr(settings, "MEDIA_ROOT", None), reference.reference_fasta.name)) ### remove dir self.utils.remove_dir(temp_dir) n_upload += 1 if (b_test and n_upload > 2): break
def handle(self, *args, **options): utils = Utils() software = Software() software_names = SoftwareNames() for sample in Sample.objects.all(): if (sample.is_deleted): continue if (not sample.is_ready_for_projects): continue if (not os.path.exists( sample.get_trimmomatic_file(TypePath.MEDIA_ROOT, True))): print("Trimmomatic files does not exist: " + sample.name) continue manageDatabase = ManageDatabase() meta_sample = manageDatabase.get_sample_metakey_last( sample, MetaKeyAndValue.META_KEY_Identify_Sample_Software, MetaKeyAndValue.META_VALUE_Success) decodeResult = DecodeObjects() result_all = decodeResult.decode_result(meta_sample.description) if (meta_sample != None): if (result_all.get_number_softwares() == 2): result_all.add_software(SoftwareDesc(software_names.get_abricate_name(), software_names.get_abricate_version(),\ software_names.get_abricate_parameters_mincov_30() + " for segments/references assignment")) manageDatabase.set_sample_metakey( sample, sample.owner, MetaKeyAndValue.META_KEY_Identify_Sample_Software, MetaKeyAndValue.META_VALUE_Success, result_all.to_json()) else: print("There's no meta_sample for sample: " + sample.name) if (os.path.exists( sample.get_draft_contigs_output(TypePath.MEDIA_ROOT))): print("Contigs already exists for this sample: " + sample.name) continue out_dir = utils.get_temp_dir() cmd = software.run_spades(sample.get_trimmomatic_file(TypePath.MEDIA_ROOT, True),\ sample.get_trimmomatic_file(TypePath.MEDIA_ROOT, False), out_dir) file_out = os.path.join(out_dir, "contigs.fasta") if (os.path.exists(file_out)): b_run_tests = False contigs_2_sequences = Contigs2Sequences(b_run_tests) (out_file_clean, clean_abricate_file ) = contigs_2_sequences.identify_contigs(file_out) ## copy the contigs from spades utils.copy_file( out_file_clean, sample.get_draft_contigs_output(TypePath.MEDIA_ROOT)) utils.copy_file( clean_abricate_file, sample.get_draft_contigs_abricate_output( TypePath.MEDIA_ROOT)) if (os.path.exists(out_file_clean)): os.unlink(out_file_clean) if (os.path.exists(clean_abricate_file)): os.unlink(clean_abricate_file) utils.remove_dir(out_dir)
def delete_project_samples(self, only_identify_files, lst_accounts_to_pass): """ delete project samples if samples was deleted """ self.out_message("\n### Project samples\n", False) count = 0 software = Software() software_names = SoftwareNames() project_samples = ProjectSample.objects.all().filter( is_deleted=True, is_deleted_in_file_system=False, sample__is_deleted=True) for project_sample in project_samples: files_removed = [] files_to_remove = [] ## test the owner if (project_sample.project.owner.username in lst_accounts_to_pass): continue ## can be removed already try: ### files from snippy for type_file in software.get_vect_type_files_to_copy( software_names.get_snippy_name()): files_to_remove.append( project_sample.get_file_output( TypePath.MEDIA_ROOT, type_file, software_names.get_snippy_name())) for type_file in software.get_vect_type_files_to_copy( software_names.get_freebayes_name()): files_to_remove.append( project_sample.get_file_output( TypePath.MEDIA_ROOT, type_file, software_names.get_freebayes_name())) ## test the days removed if (project_sample.date_deleted != None): removed_days = int( divmod((datetime.datetime.now() - project_sample.date_deleted).total_seconds(), 86400)[0]) else: removed_days = 100000 ## big number, older versions doesn't have this table field if (removed_days < self.REMOVE_FILES_AFTER_DAYS): self.out_message( "Not remove physically: {}; Deleted in web site {} days ago." .format(files_to_remove[0], removed_days), False) continue if (only_identify_files): files_removed = files_to_remove.copy() else: for path_to_remove in files_to_remove: if (self.utils.remove_file(path_to_remove)): files_removed.append(path_to_remove) if (len(files_removed) > 0): ### save the flag in database project_sample.is_deleted_in_file_system = True project_sample.save() except Exception as e: self.out_message("Fail to remove: {}".format(str(e)), False) continue for file_path in files_removed: if (file_path == None): continue if (only_identify_files): self.out_message("Identified file: " + file_path, False) else: self.out_message("Remove file: " + file_path, False) count += 1 if (only_identify_files): if (count == 0): self.out_message( "None Uploaded fastq files by batch were identified", False) else: self.out_message("Files identified: {}".format(count), False) else: if (count == 0): self.out_message( "None Uploaded fastq files by batch were removed", False) else: self.out_message("Files removed: {}".format(count), False) self.out_message("### END Project samples\n", False) return count