def main(): """ Main flow control function for the script. Is sequential over the following steps: 1. Parse arguments 2. Read in reference 3. Read in all query genomes in parallel 4. Write output matrices 5. Write stats files """ commandline_args = _parse_args() if commandline_args.dto_file: commandline_args = _parse_input_config(commandline_args) logging.basicConfig(level=logging.WARNING) from nasp.nasp_objects import ReferenceGenome, GenomeCollection reference = ReferenceGenome() import_reference(reference, commandline_args.reference_fasta, commandline_args.reference_dups) genomes = GenomeCollection() genomes.set_reference(reference) parse_input_files(commandline_args.input_files, commandline_args.num_threads, genomes, commandline_args.minimum_coverage, commandline_args.minimum_proportion) write_output_matrices(genomes, commandline_args.matrix_folder, commandline_args.filter_matrix_format) write_stats_data(genomes, commandline_args.stats_folder)
def setUp(self): reference = ReferenceGenome() with tempfile.NamedTemporaryFile(mode='w+') as tmpfile: # Position 5 and 6 can be anything, the others must match the VCF REF column or an exception will be thrown. tmpfile.write('>500WT1_test\nCCTGGGGA') tmpfile.seek(0) reference.import_fasta_file(tmpfile.name) self.genome = GenomeCollection() from nasp.vcf_to_matrix import read_vcf_file for genome in read_vcf_file(reference, 10, .9, testdata.GATK_VCF): self.genome.add_genome(genome) self.genome.set_reference(reference) self.tmpfile = tempfile.NamedTemporaryFile(mode='w', delete=False) self.matrix_formats = [ { 'dataformat': 'vcf', 'handle': self.tmpfile, 'filter': '' } ]
def setUp(self): reference_path = testdata.REFERENCE_FASTA dups_path = testdata.REFERENCE_DUPS reference = ReferenceGenome() reference.import_fasta_file(reference_path) reference.import_dups_file(dups_path) self.genome = GenomeCollection() fasta = FastaGenome() fasta.import_fasta_file(reference_path) self.genome.add_genome(fasta) self.genome.set_reference(reference) # Statistics are gathered when the matrices are created self.genome.write_to_matrices({}) self.tmpfile = tempfile.NamedTemporaryFile(mode='w', delete=False)
def setUp(self): reference_path = testdata.REFERENCE_FASTA dups_path = testdata.REFERENCE_DUPS reference = ReferenceGenome() reference.import_fasta_file(reference_path) reference.import_dups_file(dups_path) self.genome = GenomeCollection() fasta = FastaGenome() fasta.import_fasta_file(reference_path) self.genome.add_genome(fasta) self.genome.set_reference(reference) self.tmpfile = tempfile.NamedTemporaryFile(mode='w', delete=False) self.matrix_formats = [ { 'dataformat': 'matrix', 'handle': self.tmpfile, 'filter': '' } ]