示例#1
0
    def test_simple_simple(self):
        assert not locations_overlap(FeatureLocation(1, 5, strand=1), FeatureLocation(10, 15, strand=1))
        assert locations_overlap(FeatureLocation(1, 25, strand=1), FeatureLocation(10, 15, strand=1))
        assert locations_overlap(FeatureLocation(1, 12, strand=1), FeatureLocation(10, 15, strand=1))

        assert locations_overlap(FeatureLocation(12, 22, strand=-1), FeatureLocation(10, 15, strand=1))
        assert not locations_overlap(FeatureLocation(12, 22, strand=-1), FeatureLocation(10, 12, strand=1))
示例#2
0
 def overlaps_with(self, other: Union["Feature", Location]) -> bool:
     """ Returns True if the given feature overlaps with this feature.
         This operation is commutative, a.overlaps_with(b) is equivalent to
         b.overlaps_with(a).
     """
     if isinstance(other, Feature):
         location = other.location
     elif isinstance(other, (CompoundLocation, FeatureLocation)):
         location = other
     else:
         raise TypeError("Container must be a Feature, CompoundLocation, or FeatureLocation, not %s" % type(other))
     return locations_overlap(self.location, location)
示例#3
0
    def test_compound_compound(self):
        first = build_compound([(0, 10), (20, 30), (40, 50)], strand=1)
        second = build_compound([(12, 18), (32, 38), (52, 58)], strand=1)
        assert not locations_overlap(first, second)
        assert not locations_overlap(second, first)

        second = build_compound([(12, 18), (28, 38), (52, 58)], strand=1)
        assert locations_overlap(first, second)
        assert locations_overlap(second, first)

        second = build_compound([(12, 18), (32, 38), (42, 58)], strand=-1)
        assert locations_overlap(first, second)
        assert locations_overlap(second, first)
示例#4
0
    def test_mixed(self):
        compound = build_compound([(0, 10), (20, 30), (40, 50)], strand=1)
        simple = FeatureLocation(15, 17)
        assert not locations_overlap(simple, compound)
        assert not locations_overlap(compound, simple)

        simple = FeatureLocation(22, 25)
        assert locations_overlap(simple, compound)
        assert locations_overlap(compound, simple)

        simple = FeatureLocation(35, 45)
        assert locations_overlap(simple, compound)
        assert locations_overlap(compound, simple)
def remove_redundant_protoclusters(clusters: List[Protocluster],
                                   rules_by_name: Dict[str, rule_parser.DetectionRule]
                                   ) -> List[Protocluster]:
    """ Removes clusters which have superiors covering the same (or larger) region
    """
    clusters_by_rule: Dict[str, List[Protocluster]] = defaultdict(list)
    for cluster in clusters:
        clusters_by_rule[cluster.product].append(cluster)

    trimmed_clusters = []
    for cluster in clusters:
        rule_name = cluster.product
        is_redundant = False
        for superior in rules_by_name[rule_name].superiors:
            for other_cluster in clusters_by_rule.get(superior, []):
                if locations_overlap(other_cluster.core_location, cluster.core_location):
                    is_redundant = True
                    break
            if is_redundant:
                break
        if not is_redundant:
            trimmed_clusters.append(cluster)
    return trimmed_clusters
示例#6
0
def generate_javascript_data(_record: Record, region: Region,
                             results: ClusterCompareResults) -> Dict[str, Any]:
    """ Generates JSON data for the javascript to draw relevant results in HTML output

        Arguments:
            record: the relevant Record for the results
            region: the specific Region to generate data for
            results: the ClusterCompareResults that need data extracted

        Returns:
            a JSON-friendly dictionary with the relevant data
    """
    data: Dict[str, Any] = {}
    for label, db_results in results.by_database.items():
        data[label] = {}
        variant_results = db_results.by_region.get(region.get_region_number(),
                                                   {})
        for variant, result in sorted(variant_results.items()):
            scores = sorted(result.scores_by_region,
                            key=lambda x: x[1],
                            reverse=True)[:DISPLAY_LIMIT]
            if not scores:
                continue

            variant_data: Dict[str, Dict[str, Any]] = {
                "reference_clusters": {}
            }
            data[label][variant] = variant_data

            for reference, _ in scores:
                ref_entry: Dict[str, Any] = {
                    "start": reference.start,
                    "end": reference.end,
                    "links": [],  # added to afterwards
                    "reverse": False,  # potentially changed later
                }
                genes = {}
                for cds in reference.cdses.values():
                    gene_json = cds.get_minimal_json()
                    gene_json["linked"] = {}
                    genes[cds.name] = gene_json
                variant_data["reference_clusters"][
                    reference.get_identifier()] = ref_entry

                mismatching_strands = 0
                for ref_cds_id, hit in result.hits_by_region.get(
                        reference, {}).items():
                    assert locations.locations_overlap(hit.cds.location,
                                                       region.location)
                    query_cds = hit.cds
                    query_point = query_cds.location.start + (
                        query_cds.location.end - query_cds.location.start) // 2
                    ref_cds = reference.cdses[ref_cds_id]
                    subject_point = ref_cds.location.start + (
                        ref_cds.location.end - ref_cds.location.start) // 2
                    if query_cds.location.strand != ref_cds.location.strand:
                        mismatching_strands += 1
                    genes[ref_cds.name]["linked"][
                        region.get_region_number()] = query_cds.get_name()
                    ref_entry["links"].append({
                        "query": query_cds.get_name(),
                        "subject": ref_cds.name,
                        "query_loc": query_point,
                        "subject_loc": subject_point,
                    })
                ref_entry["reverse"] = mismatching_strands > len(
                    ref_entry["links"]) / 2
                ref_entry["genes"] = list(genes.values())
    return data