def query_prep(self): """ Create metadata objects for each sample """ logging.info('Preparing query files') # Find all the sequence files in the path fastas = sorted(glob(os.path.join(self.query_path, '*.fasta'))) for fasta in fastas: name = os.path.splitext(os.path.basename(fasta))[0] if name != 'combinedtargets': # Create a metadata object for each sample metadata = MetadataObject() metadata.samples = list() # Populate the metadata object with the required attributes metadata.name = name metadata.general = GenObject() metadata.commands = GenObject() metadata.alleles = GenObject() metadata.alleles.outputdirectory = os.path.join(self.query_path, metadata.name) # Set the name of the BLAST output file metadata.alleles.blast_report = os.path.join(metadata.alleles.outputdirectory, '{seq_id}.tsv'.format(seq_id=metadata.name)) try: os.remove(metadata.alleles.blast_report) except FileNotFoundError: pass make_path(metadata.alleles.outputdirectory) metadata.general.bestassemblyfile = relative_symlink(src_file=fasta, output_dir=metadata.alleles.outputdirectory, export_output=True) metadata.samples.append(metadata) self.runmetadata.samples.append(metadata)
def test_sistr_seqsero(): metadata = MetadataObject() method.runmetadata.samples = list() fasta = os.path.join(var.sequencepath, 'NC_003198.fasta') metadata.name = os.path.split(fasta)[1].split('.')[0] # Initialise the general and run categories metadata.general = GenObject() metadata.run = GenObject() metadata.general.fastqfiles = list() metadata.general.trimmedcorrectedfastqfiles = [ os.path.join(var.sequencepath, 'seqsero', '2014-SEQ-1049_seqsero.fastq.gz') ] # Set the destination folder outputdir = os.path.join(var.sequencepath, metadata.name) make_path(outputdir) # Add the output directory to the metadata metadata.general.outputdirectory = outputdir metadata.general.logout = os.path.join(outputdir, 'out') metadata.general.logerr = os.path.join(outputdir, 'err') metadata.run.outputdirectory = outputdir metadata.general.bestassemblyfile = True # Initialise an attribute to store commands metadata.commands = GenObject() # Assume that all samples are Salmonella metadata.general.referencegenus = 'Salmonella' # Set the .fasta file as the best assembly metadata.general.bestassemblyfile = fasta method.runmetadata.samples.append(metadata) method.sistr() for sample in method.runmetadata.samples: assert sample.sistr.cgmlst_genome_match == 'ERR586739' or sample.sistr.cgmlst_genome_match == 'SAL_BA2732AA' method.seqsero() for sample in method.runmetadata.samples: assert sample.seqsero.predicted_serotype == '- 9:f,g,t:-' variable_update()