Пример #1
0
    def calculate_area(nested_genotype: pandas.Series,
                       unnested_genotype: pandas.Series) -> float:
        """
			Calculates a score based on the probability the unnested genotype is a subset of the nested_genotype.
			This take into account both the area of the nested genotype and the area of all other genotypes not in the nested genotype.
		Parameters
		----------
		nested_genotype, unnested_genotype: pandas.Series
		"""

        # If the nested genotype is not fixed, group the remaining frequencies into an `other` category.
        # noinspection PyTypeChecker
        other_genotypes: pandas.Series = 1 - nested_genotype
        area_nested = areascore.area_of_series(nested_genotype)
        area_unnested = areascore.area_of_series(unnested_genotype)
        area_other = areascore.area_of_series(other_genotypes)
        common_area_nested = areascore.calculate_common_area(
            nested_genotype, unnested_genotype)
        common_area_other = areascore.calculate_common_area(
            other_genotypes, unnested_genotype)

        is_subset_nested = areascore.is_subset_legacy(area_nested,
                                                      area_unnested,
                                                      common_area_nested)
        is_subset_other = areascore.is_subset_legacy(area_other, area_unnested,
                                                     common_area_other)

        if is_subset_nested and is_subset_other:
            score = int(common_area_nested > 2 * common_area_other)
        elif is_subset_nested:
            score = 2
        else:
            score = -2
        # logger.debug(f"{unnested_genotype.name}\t{nested_genotype.name}\t{score}")
        return score
Пример #2
0
def test_area_of_series_real():
    filename = list(real_tables.values())[0]
    table_genotype = pandas.read_excel(
        filename, sheet_name="genotype").set_index('Genotype')
    for index, row in table_genotype.iterrows():
        logger.debug(index)
        expected_area = areascore.calculate_area(row)
        actual_area = areascore.area_of_series(row)
        assert pytest.approx(actual_area, abs=.001) == expected_area
Пример #3
0
def test_calculate_overlapping_area(full_overlap, no_overlap, partial_overlap):
    left, right = full_overlap
    right_area = areascore.area_of_series(right)
    overlap_area = areascore.calculate_common_area(left, right)
    assert pytest.approx(overlap_area, abs=0.001) == right_area

    left, right = no_overlap
    overlap_area = areascore.calculate_common_area(left, right)
    assert pytest.approx(overlap_area, abs=0.001) == 0

    left, right = partial_overlap
    overlap_area = areascore.calculate_common_area(left, right)
    assert pytest.approx(overlap_area, abs=0.001) == right_area

    left = pandas.Series([
        0.01, 0.279, 0.341, 0.568, 0.708, 0.913, 0.756, 0.455, 0.399, 0.13,
        0.041
    ])
    right = pandas.Series(
        [0, 0, 0, 0, 0, 0.247, 0.388, 0.215, 0.399, 0.13, 0.028])
    overlap_area = areascore.calculate_common_area(left, right)
    assert pytest.approx(overlap_area) == areascore.area_of_series(right)
Пример #4
0
def test_area_of_series(series, expected):
    s = pandas.Series(series)
    result = areascore.area_of_series(s)

    assert pytest.approx(result, expected)
Пример #5
0
def jaccard_distance(left: pandas.Series, right: pandas.Series) -> float:
    area_left = area_of_series(left)
    area_right = area_of_series(right)
    area_shared = calculate_common_area(left, right)
    j = area_shared / (area_left + area_right - area_shared)
    return 1 - j
Пример #6
0
def test_shoelace(trajectory_table, key, expected):
	# Keep this for now to make sure the area is being calculated correctly.
	series = trajectory_table.loc[key]
	result = areascore.area_of_series(series)

	assert pytest.approx(result, abs = 0.01) == expected
Пример #7
0
    def calculate_score_area(self, nested_genotype: pandas.Series,
                             unnested_genotype: pandas.Series) -> float:
        """
			Calculates a score based on the probability the unnested genotype is a subset of the nested_genotype.
			This take into account both the area of the nested genotype and the area of all other genotypes not in the nested genotype.
		Parameters
		----------
		nested_genotype, unnested_genotype: pandas.Series

		"""

        # If the nested genotype is not fixed, group the remaining frequencies into an `other` category.
        difference_series = nested_genotype - unnested_genotype
        if difference_series.mean() > 0:
            # noinspection PyTypeChecker
            other_genotypes: pandas.Series = self.flimit - nested_genotype
        else:
            # noinspection PyTypeChecker
            other_genotypes: pandas.Series = self.flimit - unnested_genotype  # In case we're testing if a small genotype contains a large genotype

        other_genotypes = other_genotypes.mask(
            lambda s: s < 0, 0.0001)  # Since the flimit is not exactly 1.

        unnested_polygon = polygon.as_polygon(unnested_genotype)

        nested_polygon = polygon.as_polygon(nested_genotype)
        other_polygon = polygon.as_polygon(other_genotypes)

        is_subset_nested = areascore.is_subset_polygon(nested_polygon,
                                                       unnested_polygon)
        is_subset_other = areascore.is_subset_polygon(other_polygon,
                                                      unnested_polygon)
        is_subset_nested_reversed = areascore.is_subset_polygon(
            unnested_polygon, nested_polygon)  # Check the reverse case

        nested_area = areascore.area_of_series(nested_genotype)
        unnested_area = areascore.area_of_series(unnested_genotype)
        common_area_nested = areascore.X_and_Y_polygon(unnested_polygon,
                                                       nested_polygon)
        xor_area_unnested = areascore.difference_polygon(
            unnested_polygon, nested_polygon
        )  # This does not distinguish between xor left vs xor right

        if self.debug:
            logger.debug(
                f"calculate_score_jaccard()->({is_subset_nested}, {is_subset_other}, {is_subset_nested_reversed}), ({common_area_nested:.2f}, {xor_area_unnested:.2f})"
            )
        if is_subset_nested and is_subset_other:
            # Evidence for both scenarios
            # Test if the nested genotype is sufficiently large to assume the unnested genotype is a subset.
            # Test only the area where the unnested genotype was detected.
            other_polygon = polygon.as_polygon(other_genotypes)
            common_area_other = areascore.X_and_Y_polygon(
                unnested_polygon, other_polygon)
            score = int(common_area_nested > 2 * common_area_other)

        elif is_subset_nested:
            score = 1
        elif is_subset_nested_reversed and not is_subset_other:
            score = -1
        else:
            score = 0
        if score == 0 and xor_area_unnested > common_area_nested * 2:
            score = -1
        elif unnested_area > 2 * nested_area:
            score = -1
        score = score * self.weight_jaccard
        return score