Exemplo n.º 1
0
def run_detection(record: Record, options: ConfigType,
                  module_results: Dict[str, Union[ModuleResults, Dict[str, Any]]]) -> Dict[str, float]:
    """ Detect different secondary metabolite clusters, PFAMs, and domains.

        Arguments:
            record: the Record to run detection over
            options: antiSMASH config
            module_results: a dictionary mapping a module's name to results from
                            a previous run on this module, as a ModuleResults subclass
                            or in JSON form

        Returns:
            the time taken by each detection module as a dictionary
    """
    timings = {}  # type: Dict[str, float]

    # run full genome detections
    for module in [full_hmmer]:
        run_module(record, cast(AntismashModule, module), options, module_results, timings)
        results = module_results.get(module.__name__)
        if results:
            assert isinstance(results, ModuleResults)
            logging.debug("Adding detection results from %s to record", module.__name__)
            results.add_to_record(record)

    # generate cluster predictions
    logging.info("Detecting secondary metabolite clusters")
    for module in [hmm_detection, cassis, clusterfinder_probabilistic]:
        run_module(record, cast(AntismashModule, module), options, module_results, timings)
        results = module_results.get(module.__name__)
        if results:
            assert isinstance(results, DetectionResults)
            for protocluster in results.get_predicted_protoclusters():
                record.add_protocluster(protocluster)
            for region in results.get_predicted_subregions():
                record.add_subregion(region)

    logging.debug("%d protoclusters found", len(record.get_protoclusters()))
    logging.debug("%d subregions found", len(record.get_subregions()))

    record.create_candidate_clusters()
    record.create_regions()

    if not record.get_regions():
        logging.info("No regions detected, skipping record")
        record.skip = "No regions detected"
        return timings

    logging.info("%d region(s) detected in record", len(record.get_regions()))

    # finally, run any detection limited to genes in clusters
    for module in [nrps_pks_domains, cluster_hmmer, genefunctions]:
        run_module(record, cast(AntismashModule, module), options, module_results, timings)
        results = module_results.get(module.__name__)
        if results:
            assert isinstance(results, ModuleResults)
            logging.debug("Adding detection results from %s to record", module.__name__)
            results.add_to_record(record)

    return timings
Exemplo n.º 2
0
 def test_genbank(self):
     dummy_record = Record(Seq("A" * 100, generic_dna))
     clusters = [
         create_cluster(3, 20, "prodA"),
         create_cluster(25, 41, "prodB")
     ]
     for cluster in clusters:
         dummy_record.add_cluster(cluster)
     subregion = SubRegion(FeatureLocation(35, 71), "test", 0.7)
     dummy_record.add_subregion(subregion)
     supercluster = SuperCluster(SuperCluster.kinds.NEIGHBOURING, clusters)
     dummy_record.add_supercluster(supercluster)
     region = Region(superclusters=[supercluster], subregions=[subregion])
     dummy_record.add_region(region)
     with NamedTemporaryFile(suffix=".gbk") as output:
         region.write_to_genbank(output.name)
         bio = list(seqio.parse(output.name))
     assert len(bio) == 1
     rec = Record.from_biopython(bio[0], taxon="bacteria")
     assert len(rec.get_regions()) == 1
     new = rec.get_region(0)
     assert new.location.start == 3 - region.location.start
     assert new.location.end == 71 - region.location.start
     assert new.products == region.products
     assert new.probabilities == region.probabilities
Exemplo n.º 3
0
    def test_prepeptide_adjustment(self):
        dummy_record = Record(Seq("A"*400, generic_dna))
        subregion = DummySubRegion(start=100, end=300)
        dummy_record.add_subregion(subregion)
        region = Region(subregions=[subregion])
        dummy_record.add_region(region)

        dummy_prepeptide = DummyFeature(200, 230, 1, "CDS_motif")
        # ensure both FeatureLocation and CompoundLocations are handled appropriately
        leader_loc = FeatureLocation(200, 210, 1)
        tail_loc = CompoundLocation([FeatureLocation(220, 223, -1), FeatureLocation(227, 230, -1)])
        dummy_prepeptide._qualifiers["leader_location"] = [str(leader_loc)]
        dummy_prepeptide._qualifiers["tail_location"] = [str(tail_loc)]
        dummy_record.add_feature(dummy_prepeptide)
        # and add a CDS_motif without either qualifier (e.g. NRPS/PKS motif) to ensure that doesn't break
        dummy_record.add_feature(DummyFeature(250, 280, 1, "CDS_motif"))

        with NamedTemporaryFile(suffix=".gbk") as output:
            region.write_to_genbank(output.name)
            bio = list(seqio.parse(output.name))[0]
        assert len(bio.features) == 4
        found = False
        for feature in bio.features:
            tail = feature.qualifiers.get("tail_location")
            leader = feature.qualifiers.get("leader_location")
            if tail and leader:
                # the part locations should now be adjusted backwards 100 bases
                assert leader == ["[100:110](+)"]
                assert tail == ["join{[120:123](-), [127:130](-)}"]
                found = True
        assert found, "prepeptide feature missing in conversion"
Exemplo n.º 4
0
def generate_results(record: Record,
                     options: ConfigType) -> ClusterFinderResults:
    """ Find and construct probabilistic cluster areas """
    predictions = find_probabilistic_clusters(record, options)
    new_areas = []
    for prediction in predictions:
        new_areas.append(
            SubRegion(prediction.location,
                      tool="clusterfinder",
                      probability=prediction.probability))
    if options.cf_create_clusters:
        for area in new_areas:
            record.add_subregion(area)
    return ClusterFinderResults(record.id,
                                new_areas,
                                create=options.cf_create_clusters)
Exemplo n.º 5
0
 def add_to_record(self, record: Record) -> None:
     store_promoters(self.promoters, record)
     for cluster in self.subregions:
         record.add_subregion(cluster)
Exemplo n.º 6
0
 def add_to_record(self, record: Record) -> None:
     if self.create_new_clusters:  # then get_predicted_subregions covered it already
         return
     for area in self.areas:
         record.add_subregion(area)