예제 #1
0
    def handle(self, *args, **options):
        self.stdout.write("Stating")

        utils = Utils()
        software = Software()
        count = 0
        for reference in Reference.objects.all():
            count += 1
            bed_path = reference.get_reference_bed(TypePath.MEDIA_ROOT)
            self.stdout.write("{} {}: ".format(
                count, reference.reference_genbank_name))
            self.logger_production.info("{} {}: ".format(
                count, reference.reference_genbank_name))
            self.logger_debug.info("{} {}: ".format(
                count, reference.reference_genbank_name))
            if os.path.exists(bed_path):
                self.stdout.write("already exist\n")
                continue

            ### create bed and index for genbank
            utils.from_genbank_to_bed(
                reference.get_reference_gbk(TypePath.MEDIA_ROOT),
                reference.get_reference_bed(TypePath.MEDIA_ROOT))
            software.create_index_files_from_igv_tools(
                reference.get_reference_bed(TypePath.MEDIA_ROOT))
            self.stdout.write("created\n")

        self.stdout.write("End")
예제 #2
0
    def handle(self, *args, **options):

        software = Software()
        project_sample_id = options['project_sample_id']
        user_id = options['user_id']
        self.stdout.write("Starting for project_sample_id: " +
                          str(project_sample_id))
        self.logger_production.info("Starting for project_sample_id: " +
                                    str(project_sample_id))
        self.logger_debug.info("Starting for project_sample_id: " +
                               str(project_sample_id))
        try:
            project_sample = ProjectSample.objects.get(pk=project_sample_id)
            if (user_id == None): user = project_sample.project.owner
            else: user = User.objects.get(pk=user_id)
            software.process_second_stage_snippy_coverage_freebayes(
                project_sample, user)
            self.stdout.write("End")
        except ProjectSample.DoesNotExist as e:
            self.stdout.write(
                "Error: ProjectSample id '{}' does not exist.".format(
                    project_sample_id))
        except User.DoesNotExist as e:
            self.stdout.write(
                "Error: User id '{}' does not exist.".format(user_id))
    def handle(self, *args, **options):

        software = Software()
        for project in Project.objects.all():
            if (project.is_deleted): continue
            self.stdout.write("Processing project: {}".format(project.name))
            for project_sample in project.project_samples.all():
                if (not project_sample.get_is_ready_to_proccess()): continue
                self.stdout.write("Processing sample: {}".format(
                    project_sample.sample.name))
                software.creat_new_reference_to_snippy(project_sample)
        self.stdout.write("End")
        self.stdout.write("Finished")
예제 #4
0
    def upload_default_files(self):
        """
		Upload default files
		"""
        ## only runs once, wen start ans test if the file was uploaded with virus hypothesis
        from manage_virus.uploadFiles import UploadFiles
        from utils.software import Software
        uploadFiles = UploadFiles()
        ## get version and pah
        b_test = False
        (version, path) = uploadFiles.get_file_to_upload(b_test)

        ## uplaod
        uploadFile = uploadFiles.upload_file(version, path)

        # create the abricate database
        if (uploadFile != None):
            software = Software()
            if (not software.is_exist_database_abricate(
                    uploadFile.abricate_name)):
                software.create_database_abricate(uploadFile.abricate_name,
                                                  uploadFile.path)
    def handle(self, *args, **options):

        software = Software()
        sample_id = options['sample_id']
        user_id = options['user_id']
        self.stdout.write("Starting for sample_id: " + str(sample_id))
        self.logger_production.info("Starting for sample_id: " +
                                    str(sample_id))
        self.logger_debug.info("Starting for sample_id: " + str(sample_id))
        try:
            sample = Sample.objects.get(pk=sample_id)
            if (user_id == None): user = sample.owner
            else: user = User.objects.get(pk=user_id)
            b_return = software.run_fastq_and_trimmomatic_and_identify_species(
                sample, user)
            self.stdout.write("Resulting: " + str(b_return))
            self.stdout.write("End")
        except Sample.DoesNotExist as e:
            self.stdout.write(
                "Error: Sample id '{}' does not exist.".format(sample_id))
        except User.DoesNotExist as e:
            self.stdout.write(
                "Error: User id '{}' does not exist.".format(user_id))
예제 #6
0
class ReferenceForm(forms.ModelForm):
    """
	Reference form, name, isolate_name and others
	"""
    utils = Utils()
    software = Software()
    error_css_class = 'error'

    class Meta:
        model = Reference
        # specify what fields should be used in this form.
        fields = ('name', 'isolate_name', 'reference_fasta',
                  'reference_genbank')

    def __init__(self, *args, **kwargs):
        self.request = kwargs.pop('request')
        super(ReferenceForm, self).__init__(*args, **kwargs)

        ## can exclude explicitly
        ## exclude = ('md5',)
        field_text = [
            # (field_name, Field title label, Detailed field description, requiered)
            ('name', 'Name', 'Regular name for this reference', True),
            ('isolate_name', 'Isolate name', 'Isolate name for this reference',
             False),
            ('reference_fasta', 'Reference (fasta)',
             'Reference file in fasta format', True),
            ('reference_genbank', 'Reference (genBank)',
             """Reference file in genBank format.<br>
					Locus designations in the multi-GenBank file must have the same name as in the respective fasta file.<br>
					If you do not upload a Genbank file, INSaFLU will annotate the upload fasta file for you""",
             False),
        ]
        for x in field_text:
            self.fields[x[0]].label = x[1]
            self.fields[x[0]].help_text = x[2]
            self.fields[x[0]].required = x[3]


## in case you have to undo to specific ID
##		cancel_url = reverse('references')
##		if self.instance: cancel_url = reverse('references', kwargs={'pk': self.instance.id})

        self.helper = FormHelper()
        self.helper.form_method = 'POST'
        self.helper.layout = Layout(
            Div(Div('name', css_class="col-sm-3"),
                Div('isolate_name', css_class="col-sm-7"),
                css_class='row'),
            Div('reference_fasta', css_class='show-for-sr'),
            Div('reference_genbank', css_class='show-for-sr'),
            ButtonHolder(
                Submit('save', 'Save', css_class='btn-primary'),
                Button('cancel',
                       'Cancel',
                       css_class='btn-secondary',
                       onclick='window.location.href="{}"'.format(
                           reverse('references')))))

    def clean(self):
        """
		Clean all together because it's necessary to compare the genbank and fasta files
		"""
        cleaned_data = super(ReferenceForm, self).clean()
        name = cleaned_data['name']
        try:
            Reference.objects.get(name__iexact=name,
                                  owner=self.request.user,
                                  is_obsolete=False,
                                  is_deleted=False)
            self.add_error(
                'name',
                _("This name '" + name +
                  "' already exist in database, please choose other."))
        except Reference.DoesNotExist:
            pass

        ## test reference_fasta
        if ('reference_fasta' not in cleaned_data):
            self.add_error('reference_fasta', _("Error: Must have a file."))
            return cleaned_data

        ### testing file names
        reference_fasta = cleaned_data['reference_fasta']
        reference_genbank = cleaned_data['reference_genbank']
        if (reference_genbank != None
                and reference_fasta.name == reference_genbank.name):
            self.add_error(
                'reference_fasta',
                _("Error: both files has the same name. Please, different files."
                  ))
            self.add_error(
                'reference_genbank',
                _("Error: both files has the same name. Please, different files."
                  ))
            return cleaned_data

        ## testing fasta
        some_error_in_files = False
        reference_fasta_temp_file_name = NamedTemporaryFile(prefix='flu_fa_',
                                                            delete=False)
        reference_fasta_temp_file_name.write(reference_fasta.read())
        reference_fasta_temp_file_name.flush()
        reference_fasta_temp_file_name.close()
        self.software.dos_2_unix(reference_fasta_temp_file_name.name)
        try:
            number_locus = self.utils.is_fasta(
                reference_fasta_temp_file_name.name)
            self.request.session[
                Constants.NUMBER_LOCUS_FASTA_FILE] = number_locus

            ## test the max numbers
            if (number_locus > Constants.MAX_SEQUENCES_FROM_FASTA):
                self.add_error(
                    'reference_fasta',
                    _('Max allow number of sequences in fasta: {}'.format(
                        Constants.MAX_SEQUENCES_FROM_FASTA)))
                some_error_in_files = True
            total_length_fasta = self.utils.get_total_length_fasta(
                reference_fasta_temp_file_name.name)
            if (not some_error_in_files and total_length_fasta >
                    Constants.MAX_LENGTH_SEQUENCE_TOTAL_FROM_FASTA):
                some_error_in_files = True
                self.add_error(
                    'reference_fasta',
                    _('The length sum of the sequences in fasta: {}'.format(
                        Constants.MAX_LENGTH_SEQUENCE_TOTAL_FROM_FASTA)))

            n_seq_name_bigger_than = self.utils.get_number_seqs_names_bigger_than(
                reference_fasta_temp_file_name.name,
                Constants.MAX_LENGTH_SEQ_NAME)
            if (not some_error_in_files and n_seq_name_bigger_than > 0):
                some_error_in_files = True
                if (n_seq_name_bigger_than == 1):
                    self.add_error(
                        'reference_fasta',
                        _('There is one sequence name length bigger than {0}. The max. length name is {0}. Prokka constrainments.'
                          .format(Constants.MAX_LENGTH_SEQ_NAME)))
                else:
                    self.add_error(
                        'reference_fasta',
                        _('There are {0} sequences with name length bigger than {1}. The max. length name is {1}. Prokka constrainments.'
                          .format(n_seq_name_bigger_than,
                                  Constants.MAX_LENGTH_SEQ_NAME)))

                ## if some errors in the files, fasta or genBank, return
                if (some_error_in_files): return cleaned_data

            if (not self.utils.test_sequences_same_length(
                    reference_fasta_temp_file_name.name)):
                self.add_error(
                    'reference_fasta',
                    _('There are sequences that have not the same length. This produce errors for samtools faidx.'
                      ))
                return cleaned_data

        except IOError as e:  ## (e.errno, e.strerror)
            os.unlink(reference_fasta_temp_file_name.name)
            some_error_in_files = True
            self.add_error('reference_fasta', e.args[0])
        except:
            os.unlink(reference_fasta_temp_file_name.name)
            some_error_in_files = True
            self.add_error('reference_fasta', "Not a valid 'fasta' file.")

        ### test if it has degenerated bases
        if (os.path.exists(reference_fasta_temp_file_name.name)):
            try:
                self.utils.has_degenerated_bases(
                    reference_fasta_temp_file_name.name)
            except Exception as e:
                os.unlink(reference_fasta_temp_file_name.name)
                some_error_in_files = True
                self.add_error('reference_fasta', e.args[0])

        ### testing genbank
        reference_genbank_temp_file_name = NamedTemporaryFile(prefix='flu_gb_',
                                                              delete=False)
        reference_genbank = cleaned_data['reference_genbank']
        if (reference_genbank != None):
            reference_genbank_temp_file_name.write(reference_genbank.read())
            reference_genbank_temp_file_name.flush()
            reference_genbank_temp_file_name.close()
            self.software.dos_2_unix(reference_genbank_temp_file_name.name)
            try:
                self.utils.is_genbank(reference_genbank_temp_file_name.name)
            except IOError as e:
                some_error_in_files = True
                os.unlink(reference_genbank_temp_file_name.name)
                self.add_error('reference_genbank', e.args[0])
            except:
                os.unlink(reference_genbank_temp_file_name.name)
                some_error_in_files = True
                self.add_error('reference_genbank',
                               "Not a valid 'genbank' file.")

        ## if some errors in the files, fasta or genBank, return
        if (some_error_in_files): return cleaned_data

        ## test locus names and length of sequences
        if (reference_genbank != None):
            try:
                self.utils.compare_locus_fasta_gb(
                    reference_fasta_temp_file_name.name,
                    reference_genbank_temp_file_name.name)
            except ValueError as e:
                self.add_error('reference_fasta', e.args[0])
                self.add_error('reference_genbank', e.args[0])

        ## remove temp files
        os.unlink(reference_genbank_temp_file_name.name)
        os.unlink(reference_fasta_temp_file_name.name)
        return cleaned_data
예제 #7
0
	def upload_default_references(self, user, b_test):
		"""
		upload default files for reference
		"""
		from managing_files.models import Reference
		from utils.software import Software
		
		software = Software()
		path_to_find = os.path.join(getattr(settings, "STATIC_ROOT", None), Constants.DIR_TYPE_REFERENCES)
		n_upload = 0
		for file in self.utils.get_all_files(path_to_find):
			file = os.path.join(path_to_find, file)
			try:
				number_of_elements = self.utils.is_fasta(file)
			except IOError as e:
				print(e.args[0])
				continue
			
			name = self.utils.clean_extension(os.path.basename(file))
			try:
				reference = Reference.objects.get(owner=user, is_obsolete=False, is_deleted=False, name__iexact=name)
			except Reference.DoesNotExist as e:
				reference = Reference()
				reference.display_name = name
				reference.isolate_name = name
				reference.name = reference.display_name
				reference.owner = user
				reference.is_obsolete = False
				reference.number_of_locus = number_of_elements
				reference.hash_reference_fasta = self.utils.md5sum(file)
				reference.reference_fasta_name = os.path.basename(file)
				reference.scentific_name = os.path.basename(file)
				reference.reference_genbank_name = name + FileExtensions.FILE_GBK
				reference.save()
				
				## move the files to the right place
				sz_file_to = os.path.join(getattr(settings, "MEDIA_ROOT", None), self.utils.get_path_to_reference_file(user.id, reference.id), reference.reference_fasta_name)
				self.utils.copy_file(file, sz_file_to)
				reference.reference_fasta.name = os.path.join(self.utils.get_path_to_reference_file(user.id, reference.id), reference.reference_fasta_name)
				
				temp_dir = software.run_prokka(file, os.path.basename(file))
				sz_file_to = os.path.join(getattr(settings, "MEDIA_ROOT", None), self.utils.get_path_to_reference_file(user.id, reference.id), reference.reference_genbank_name)
				self.utils.move_file(os.path.join(temp_dir, reference.reference_genbank_name), sz_file_to)
				reference.reference_genbank.name = os.path.join(self.utils.get_path_to_reference_file(user.id, reference.id), reference.reference_genbank_name)
				reference.hash_reference_genbank = self.utils.md5sum(sz_file_to)
				reference.save()
				
				### create bed and index for genbank
				self.utils.from_genbank_to_bed(sz_file_to, reference.get_reference_bed(TypePath.MEDIA_ROOT))
				software.create_index_files_from_igv_tools(reference.get_reference_bed(TypePath.MEDIA_ROOT))
				
				### save in database the elements and coordinates
				self.utils.get_elements_from_db(reference, user)
				self.utils.get_elements_and_cds_from_db(reference, user)

				## create the index before commit in database, throw exception if something goes wrong
				software.create_fai_fasta(os.path.join(getattr(settings, "MEDIA_ROOT", None), reference.reference_fasta.name))
		
				### remove dir
				self.utils.remove_dir(temp_dir)
				n_upload += 1
			
			if (b_test and n_upload > 2): break
예제 #8
0
    def handle(self, *args, **options):

        utils = Utils()
        software = Software()
        software_names = SoftwareNames()
        for sample in Sample.objects.all():
            if (sample.is_deleted): continue
            if (not sample.is_ready_for_projects): continue

            if (not os.path.exists(
                    sample.get_trimmomatic_file(TypePath.MEDIA_ROOT, True))):
                print("Trimmomatic files does not exist: " + sample.name)
                continue

            manageDatabase = ManageDatabase()
            meta_sample = manageDatabase.get_sample_metakey_last(
                sample, MetaKeyAndValue.META_KEY_Identify_Sample_Software,
                MetaKeyAndValue.META_VALUE_Success)
            decodeResult = DecodeObjects()
            result_all = decodeResult.decode_result(meta_sample.description)

            if (meta_sample != None):
                if (result_all.get_number_softwares() == 2):
                    result_all.add_software(SoftwareDesc(software_names.get_abricate_name(), software_names.get_abricate_version(),\
                       software_names.get_abricate_parameters_mincov_30() + " for segments/references assignment"))
                    manageDatabase.set_sample_metakey(
                        sample, sample.owner,
                        MetaKeyAndValue.META_KEY_Identify_Sample_Software,
                        MetaKeyAndValue.META_VALUE_Success,
                        result_all.to_json())
            else:
                print("There's no meta_sample for sample: " + sample.name)

            if (os.path.exists(
                    sample.get_draft_contigs_output(TypePath.MEDIA_ROOT))):
                print("Contigs already exists for this sample: " + sample.name)
                continue

            out_dir = utils.get_temp_dir()
            cmd = software.run_spades(sample.get_trimmomatic_file(TypePath.MEDIA_ROOT, True),\
              sample.get_trimmomatic_file(TypePath.MEDIA_ROOT, False), out_dir)
            file_out = os.path.join(out_dir, "contigs.fasta")

            if (os.path.exists(file_out)):
                b_run_tests = False
                contigs_2_sequences = Contigs2Sequences(b_run_tests)
                (out_file_clean, clean_abricate_file
                 ) = contigs_2_sequences.identify_contigs(file_out)
                ## copy the contigs from spades
                utils.copy_file(
                    out_file_clean,
                    sample.get_draft_contigs_output(TypePath.MEDIA_ROOT))
                utils.copy_file(
                    clean_abricate_file,
                    sample.get_draft_contigs_abricate_output(
                        TypePath.MEDIA_ROOT))

                if (os.path.exists(out_file_clean)): os.unlink(out_file_clean)
                if (os.path.exists(clean_abricate_file)):
                    os.unlink(clean_abricate_file)
            utils.remove_dir(out_dir)
예제 #9
0
    def delete_project_samples(self, only_identify_files,
                               lst_accounts_to_pass):
        """
		delete project samples if samples was deleted
		"""
        self.out_message("\n### Project samples\n", False)
        count = 0

        software = Software()
        software_names = SoftwareNames()
        project_samples = ProjectSample.objects.all().filter(
            is_deleted=True,
            is_deleted_in_file_system=False,
            sample__is_deleted=True)
        for project_sample in project_samples:
            files_removed = []
            files_to_remove = []

            ## test the owner
            if (project_sample.project.owner.username in lst_accounts_to_pass):
                continue

            ## can be removed already
            try:
                ### files from snippy
                for type_file in software.get_vect_type_files_to_copy(
                        software_names.get_snippy_name()):
                    files_to_remove.append(
                        project_sample.get_file_output(
                            TypePath.MEDIA_ROOT, type_file,
                            software_names.get_snippy_name()))
                for type_file in software.get_vect_type_files_to_copy(
                        software_names.get_freebayes_name()):
                    files_to_remove.append(
                        project_sample.get_file_output(
                            TypePath.MEDIA_ROOT, type_file,
                            software_names.get_freebayes_name()))

                ## test the days removed
                if (project_sample.date_deleted != None):
                    removed_days = int(
                        divmod((datetime.datetime.now() -
                                project_sample.date_deleted).total_seconds(),
                               86400)[0])
                else:
                    removed_days = 100000  ## big number, older versions doesn't have this table field
                if (removed_days < self.REMOVE_FILES_AFTER_DAYS):
                    self.out_message(
                        "Not remove physically: {}; Deleted in web site {} days ago."
                        .format(files_to_remove[0], removed_days), False)
                    continue

                if (only_identify_files):
                    files_removed = files_to_remove.copy()
                else:
                    for path_to_remove in files_to_remove:
                        if (self.utils.remove_file(path_to_remove)):
                            files_removed.append(path_to_remove)

                    if (len(files_removed) > 0):
                        ### save the flag in database
                        project_sample.is_deleted_in_file_system = True
                        project_sample.save()

            except Exception as e:
                self.out_message("Fail to remove: {}".format(str(e)), False)
                continue
            for file_path in files_removed:
                if (file_path == None): continue
                if (only_identify_files):
                    self.out_message("Identified file: " + file_path, False)
                else:
                    self.out_message("Remove file: " + file_path, False)
                count += 1

        if (only_identify_files):
            if (count == 0):
                self.out_message(
                    "None Uploaded fastq files by batch were identified",
                    False)
            else:
                self.out_message("Files identified: {}".format(count), False)
        else:
            if (count == 0):
                self.out_message(
                    "None Uploaded fastq files by batch were removed", False)
            else:
                self.out_message("Files removed: {}".format(count), False)
        self.out_message("### END Project samples\n", False)
        return count