Пример #1
0
def main():
    """
    Main flow control function for the script.  Is sequential over the
    following steps:
    1.  Parse arguments
    2.  Read in reference
    3.  Read in all query genomes in parallel
    4.  Write output matrices
    5.  Write stats files
    """
    commandline_args = _parse_args()
    if commandline_args.dto_file:
        commandline_args = _parse_input_config(commandline_args)
    logging.basicConfig(level=logging.WARNING)
    from nasp.nasp_objects import ReferenceGenome, GenomeCollection

    reference = ReferenceGenome()
    import_reference(reference, commandline_args.reference_fasta,
                     commandline_args.reference_dups)
    genomes = GenomeCollection()
    genomes.set_reference(reference)
    parse_input_files(commandline_args.input_files,
                      commandline_args.num_threads, genomes,
                      commandline_args.minimum_coverage,
                      commandline_args.minimum_proportion)
    write_output_matrices(genomes, commandline_args.matrix_folder,
                          commandline_args.filter_matrix_format)
    write_stats_data(genomes, commandline_args.stats_folder)
Пример #2
0
    def setUp(self):
        reference = ReferenceGenome()
        with tempfile.NamedTemporaryFile(mode='w+') as tmpfile:
            # Position 5 and 6 can be anything, the others must match the VCF REF column or an exception will be thrown.
            tmpfile.write('>500WT1_test\nCCTGGGGA')
            tmpfile.seek(0)
            reference.import_fasta_file(tmpfile.name)
        self.genome = GenomeCollection()
        from nasp.vcf_to_matrix import read_vcf_file
        for genome in read_vcf_file(reference, 10, .9, testdata.GATK_VCF):
            self.genome.add_genome(genome)
        self.genome.set_reference(reference)

        self.tmpfile = tempfile.NamedTemporaryFile(mode='w', delete=False)

        self.matrix_formats = [
            {
                'dataformat': 'vcf',
                'handle': self.tmpfile,
                'filter': ''
            }
        ]
Пример #3
0
    def setUp(self):
        reference_path = testdata.REFERENCE_FASTA
        dups_path = testdata.REFERENCE_DUPS
        reference = ReferenceGenome()
        reference.import_fasta_file(reference_path)
        reference.import_dups_file(dups_path)
        self.genome = GenomeCollection()
        fasta = FastaGenome()
        fasta.import_fasta_file(reference_path)
        self.genome.add_genome(fasta)
        self.genome.set_reference(reference)
        # Statistics are gathered when the matrices are created
        self.genome.write_to_matrices({})

        self.tmpfile = tempfile.NamedTemporaryFile(mode='w', delete=False)
Пример #4
0
    def setUp(self):
        reference_path = testdata.REFERENCE_FASTA
        dups_path = testdata.REFERENCE_DUPS
        reference = ReferenceGenome()
        reference.import_fasta_file(reference_path)
        reference.import_dups_file(dups_path)
        self.genome = GenomeCollection()
        fasta = FastaGenome()
        fasta.import_fasta_file(reference_path)
        self.genome.add_genome(fasta)
        self.genome.set_reference(reference)

        self.tmpfile = tempfile.NamedTemporaryFile(mode='w', delete=False)

        self.matrix_formats = [
            {
                'dataformat': 'matrix',
                'handle': self.tmpfile,
                'filter': ''
            }
        ]