示例#1
0
 def setup(self):
     self.reference_filepath = "src/mykrobe/data/NC_000962.3.fasta"
     self.reference = os.path.basename(
         self.reference_filepath).split('.fa')[0]
     self.aa2dna = GeneAminoAcidChangeToDNAVariants(
         "src/mykrobe/data/NC_000962.3.fasta",
         "src/mykrobe/data/NC_000962.3.gb")
示例#2
0
 def test_gene_muts(self):
     self.gm = GeneAminoAcidChangeToDNAVariants(
         reference="src/mykrobe/data/NC_000962.3.fasta",
         genbank="src/mykrobe/data/NC_000962.3.gb")
     assert self.gm.get_alts("K") == ['AAA', 'AAG']
     # GAT -> ['GCA', 'GCT', 'GCC', 'GCG'], positions 759813,14,15
     assert sorted(self.gm.get_variant_names("rpoB", "D3A")) == sorted(
         ['GAT759813GCA', 'GAT759813GCT', 'GAT759813GCC', 'GAT759813GCG'])
     # GAT -> ['GCA', 'GCT', 'GCC', 'GCG'], positions 759813,14,15
     assert sorted(self.gm.get_variant_names("rpoB", "D3X")) == sorted([
         'GAT759813GCA', 'GAT759813GCT', 'GAT759813GCC', 'GAT759813GCG',
         'GAT759813TGT', 'GAT759813TGC', 'GAT759813GAA', 'GAT759813GAG',
         'GAT759813GGA', 'GAT759813GGT', 'GAT759813GGC', 'GAT759813GGG',
         'GAT759813TTT', 'GAT759813TTC', 'GAT759813ATA', 'GAT759813ATT',
         'GAT759813ATC', 'GAT759813CAT', 'GAT759813CAC', 'GAT759813AAA',
         'GAT759813AAG', 'GAT759813ATG', 'GAT759813TTA', 'GAT759813TTG',
         'GAT759813CTA', 'GAT759813CTT', 'GAT759813CTC', 'GAT759813CTG',
         'GAT759813AAT', 'GAT759813AAC', 'GAT759813CAA', 'GAT759813CAG',
         'GAT759813CCA', 'GAT759813CCT', 'GAT759813CCC', 'GAT759813CCG',
         'GAT759813AGT', 'GAT759813AGC', 'GAT759813TCA', 'GAT759813TCT',
         'GAT759813TCC', 'GAT759813TCG', 'GAT759813AGA', 'GAT759813AGG',
         'GAT759813CGA', 'GAT759813CGT', 'GAT759813CGC', 'GAT759813CGG',
         'GAT759813ACA', 'GAT759813ACT', 'GAT759813ACC', 'GAT759813ACG',
         'GAT759813TGG', 'GAT759813GTA', 'GAT759813GTT', 'GAT759813GTC',
         'GAT759813GTG', 'GAT759813TAT', 'GAT759813TAC'
     ])
     DB.drop_database('mykrobe-test')
示例#3
0
 def test_gene_muts2(self):
     self.gm = GeneAminoAcidChangeToDNAVariants(
         reference=f"{DATA_DIR}/NC_000962.3.fasta",
         genbank=f"{DATA_DIR}/NC_000962.3.gb")
     assert self.gm.get_alts("K") == ['AAA', 'AAG']
     # AGC -> ['CTT', 'CTC', 'CTA', 'CTG']
 #   # GAG -> ['GCA', 'GCT', 'GCC', 'GCG']
     # RC : CTC -> ['TGC',...] position2156103
     assert sorted(self.gm.get_variant_names("katG", "E3A")) == sorted(
         ['CTC2156103TGC', 'CTC2156103AGC', 'CTC2156103GGC', 'CTC2156103CGC'])
     DB.drop_database('mykrobe-test')
示例#4
0
    def setup(self):
        DB.drop_database('mykrobe-test')
        with open("src/mykrobe/data/NC_000962.3.fasta", 'r') as infile:
            self.reference_seq = list(SeqIO.parse(infile, "fasta"))[0].seq

        self.gm = GeneAminoAcidChangeToDNAVariants(
            reference="src/mykrobe/data/NC_000962.3.fasta",
            genbank="src/mykrobe/data/NC_000962.3.gb")
        self.reference_set = ReferenceSet().create_and_save(name="ref_set")
        self.variant_set = VariantSet.create_and_save(
            name="this_vcf_file", reference_set=self.reference_set)
        self.variant_sets = [self.variant_set]
        self.reference_id = Reference().create_and_save(
            name="ref", md5checksum="sre", reference_sets=[self.reference_set])
示例#5
0
 def test_gene_muts(self):
     self.gm = GeneAminoAcidChangeToDNAVariants(
         reference=f"{DATA_DIR}/NC_000962.3.fasta",
         genbank=f"{DATA_DIR}/NC_000962.3.gb",
     )
     assert self.gm.get_alts("K") == ["AAA", "AAG"]
     # GAT -> ['GCA', 'GCT', 'GCC', 'GCG'], positions 759813,14,15
     assert sorted(self.gm.get_variant_names("rpoB", "D3A")) == sorted(
         ["GAT759813GCA", "GAT759813GCT", "GAT759813GCC", "GAT759813GCG"])
     # GAT -> ['GCA', 'GCT', 'GCC', 'GCG'], positions 759813,14,15
     assert sorted(self.gm.get_variant_names("rpoB", "D3X")) == sorted([
         "GAT759813GCA",
         "GAT759813GCT",
         "GAT759813GCC",
         "GAT759813GCG",
         "GAT759813TGT",
         "GAT759813TGC",
         "GAT759813GAA",
         "GAT759813GAG",
         "GAT759813GGA",
         "GAT759813GGT",
         "GAT759813GGC",
         "GAT759813GGG",
         "GAT759813TTT",
         "GAT759813TTC",
         "GAT759813ATA",
         "GAT759813ATT",
         "GAT759813ATC",
         "GAT759813CAT",
         "GAT759813CAC",
         "GAT759813AAA",
         "GAT759813AAG",
         "GAT759813ATG",
         "GAT759813TTA",
         "GAT759813TTG",
         "GAT759813CTA",
         "GAT759813CTT",
         "GAT759813CTC",
         "GAT759813CTG",
         "GAT759813AAT",
         "GAT759813AAC",
         "GAT759813CAA",
         "GAT759813CAG",
         "GAT759813CCA",
         "GAT759813CCT",
         "GAT759813CCC",
         "GAT759813CCG",
         "GAT759813AGT",
         "GAT759813AGC",
         "GAT759813TCA",
         "GAT759813TCT",
         "GAT759813TCC",
         "GAT759813TCG",
         "GAT759813AGA",
         "GAT759813AGG",
         "GAT759813CGA",
         "GAT759813CGT",
         "GAT759813CGC",
         "GAT759813CGG",
         "GAT759813ACA",
         "GAT759813ACT",
         "GAT759813ACC",
         "GAT759813ACG",
         "GAT759813TGG",
         "GAT759813GTA",
         "GAT759813GTT",
         "GAT759813GTC",
         "GAT759813GTG",
         "GAT759813TAT",
         "GAT759813TAC",
     ])
     DB.drop_database("mykrobe-test")
示例#6
0
def run(parser, args):
    DB = connect('mykrobe-%s' % (args.db_name))
    if DB is not None:
        try:
            Variant.objects()
            logging.info(
                "Connected to mykrobe-%s" % (args.db_name))
        except (ServerSelectionTimeoutError):
            DB = None
            logging.warning(
                "Could not connect to database. Continuing without using genetic backgrounds")
    mutations = []
    reference = os.path.basename(args.reference_filepath).split('.fa')[0]
    if args.vcf:
        run_make_probes_from_vcf_file(args)
    elif args.genbank:
        aa2dna = GeneAminoAcidChangeToDNAVariants(
            args.reference_filepath,
            args.genbank)
        if args.text_file:
            with open(args.text_file, 'r') as infile:
                reader = csv.reader(infile, delimiter="\t")
                for row in reader:
                    gene, mutation_string, alphabet = row
                    if alphabet == "DNA":
                        protein_coding_var = False
                    else:
                        protein_coding_var = True
                    for var_name in aa2dna.get_variant_names(
                            gene, mutation_string, protein_coding_var):
                        mutation = Mutation(reference=reference,
                                            var_name=var_name,
                                            gene=aa2dna.get_gene(gene),
                                            mut=mutation_string)
                        mutations.append(mutation)
        else:
            for variant in args.variants:

                gene, mutation = variant.split("_")
                for var_name in aa2dna.get_variant_names(gene, mutation):
                    mutations.append(
                        Mutation(reference=reference,
                                 var_name=var_name,
                                 gene=gene,
                                 mut=mutation))
    else:
        if args.text_file:
            with open(args.text_file, 'r') as infile:
                reader = csv.reader(infile, delimiter="\t")
                for row in reader:
                    gene_name, pos, ref, alt, alphabet = row
                    if gene_name == "ref":
                        mutations.append(
                            Mutation(
                                reference=reference,
                                var_name="".join([ref, pos, alt])))
                    else:
                        mutations.append(
                            Mutation(
                                reference=reference,
                                var_name=row[0]))
        else:
            mutations.extend(Mutation(reference=reference, var_name=v)
                             for v in args.variants)
    al = AlleleGenerator(
        reference_filepath=args.reference_filepath,
        kmer=args.kmer)
    for enum, mut in enumerate(mutations):
        if enum % 100 == 0:
            logger.info(
                "%i of %i - %f%%" % (enum, len(mutations), round(100*enum/len(mutations), 2)))
        variant_panel = make_variant_probe(
            al, mut.variant, args.kmer, DB=DB, no_backgrounds=args.no_backgrounds)
        if variant_panel is not None:
            for i, ref in enumerate(variant_panel.refs):
                try:
                    gene_name = mut.gene.name
                except AttributeError:
                    gene_name = "NA"

                sys.stdout.write(
                    ">ref-%s?var_name=%s&num_alts=%i&ref=%s&enum=%i&gene=%s&mut=%s\n" %
                    (mut.mut, mut.variant.var_name, len(
                        variant_panel.alts), mut.reference, i, gene_name, mut.mut))
                sys.stdout.write("%s\n" % ref)

            for i, a in enumerate(variant_panel.alts):
                sys.stdout.write(">alt-%s?var_name=%s&enum=%i&gene=%s&mut=%s\n" %
                                 (mut.mut, mut.variant.var_name, i, gene_name, mut.mut))

                sys.stdout.write("%s\n" % a)
        else:
            logging.warning(
                "All variants failed for %s_%s - %s" %
                (mut.gene, mut.mut, mut.variant))
示例#7
0
 def setup(self):
     self.reference_filepath = f"{DATA_DIR}/NC_000962.3.fasta"
     self.reference = os.path.basename(
         self.reference_filepath).split('.fa')[0]
     self.aa2dna = GeneAminoAcidChangeToDNAVariants(
         f"{DATA_DIR}/NC_000962.3.fasta", f"{DATA_DIR}/NC_000962.3.gb")
示例#8
0
def run(parser, args):
    # There's no need to try to connect to database if we're not doing backgrounds
    if args.no_backgrounds:
        logger.info(
            "Not connecting to database, because --no-backgrounds option used")
        DB = None
    else:
        DB = connect("%s-%s" % (DB_PREFIX, args.db_name))

    if DB is not None:
        try:
            Variant.objects()
            logger.info("Connected to %s-%s" % (DB_PREFIX, args.db_name))
        except (ServerSelectionTimeoutError):
            DB = None
            logger.warning(
                "Could not connect to database. Continuing without using genetic backgrounds"
            )
    mutations = []
    lineages = set()
    reference = os.path.basename(args.reference_filepath).split(".fa")[0]
    if args.vcf:
        run_make_probes_from_vcf_file(args)
    elif args.genbank:
        aa2dna = GeneAminoAcidChangeToDNAVariants(args.reference_filepath,
                                                  args.genbank)
        if args.text_file:
            with open(args.text_file, "r") as infile:
                reader = csv.reader(infile, delimiter="\t")
                for row in reader:
                    gene, mutation_string, alphabet = row
                    if alphabet == "DNA":
                        protein_coding_var = False
                    else:
                        protein_coding_var = True
                    for var_name in aa2dna.get_variant_names(
                            gene, mutation_string, protein_coding_var):
                        mutation = Mutation(
                            reference=reference,
                            var_name=var_name,
                            gene=aa2dna.get_gene(gene),
                            mut=mutation_string,
                            protein_coding_var=protein_coding_var,
                        )
                        mutations.append(mutation)
        else:
            for variant in args.variants:

                gene, mutation = variant.split("_")
                for var_name in aa2dna.get_variant_names(gene, mutation):
                    mutations.append(
                        Mutation(
                            reference=reference,
                            var_name=var_name,
                            gene=gene,
                            mut=mutation,
                        ))
    else:
        if args.text_file:
            mutations, lineages = load_dna_vars_txt_file(
                args.text_file, reference)
            if args.lineage:
                with open(args.lineage, "w") as f:
                    json.dump(lineages, f, sort_keys=True, indent=2)
        else:
            mutations.extend(
                Mutation(reference=reference, var_name=v)
                for v in args.variants)

    al = AlleleGenerator(reference_filepath=args.reference_filepath,
                         kmer=args.kmer)
    for enum, mut in enumerate(mutations):
        if enum % 100 == 0:
            logger.info(
                "%i of %i - %f%%" %
                (enum, len(mutations), round(100 * enum / len(mutations), 2)))
        variant_panel = make_variant_probe(al,
                                           mut.variant,
                                           args.kmer,
                                           DB=DB,
                                           no_backgrounds=args.no_backgrounds)
        if variant_panel is not None:
            for i, ref in enumerate(variant_panel.refs):
                try:
                    gene_name = mut.gene.name
                except AttributeError:
                    gene_name = "NA"

                sys.stdout.write(
                    ">ref-%s?var_name=%s&num_alts=%i&ref=%s&enum=%i&gene=%s&mut=%s\n"
                    % (
                        mut.mutation_output_name,
                        mut.variant.var_name,
                        len(variant_panel.alts),
                        mut.reference,
                        i,
                        gene_name,
                        mut.mutation_output_name,
                    ))
                sys.stdout.write("%s\n" % ref)

            for i, a in enumerate(variant_panel.alts):
                sys.stdout.write(
                    ">alt-%s?var_name=%s&enum=%i&gene=%s&mut=%s\n" % (
                        mut.mutation_output_name,
                        mut.variant.var_name,
                        i,
                        gene_name,
                        mut.mutation_output_name,
                    ))

                sys.stdout.write("%s\n" % a)
        else:
            logger.warning("All variants failed for %s_%s - %s" %
                           (mut.gene, mut.mutation_output_name, mut.variant))