Пример #1
0
def test_get_valid_points_simple():
    left = pandas.Series([0, .1, .2, .3, .4, .5, .6, .7, .8, .9, 1, 0])
    right = pandas.Series([0, 0, 0, .1, .2, .3, .3, .3, .3, 0, 0, 0])

    result_left, result_right = widgets.get_valid_points(left, right, 0.03)
    assert result_left.tolist() == [.1, .2, .3, .4, .5, .6, .7, .8, .9, 1]
    assert result_right.tolist() == [0, 0, .1, .2, .3, .3, .3, .3, 0, 0]

    result_left, result_right = widgets.get_valid_points(
        left, right, 0.03, 0.97)
    assert result_left.tolist() == [.1, .2, .3, .4, .5, .6, .7, .8, .9]
    assert result_right.tolist() == [0, 0, .1, .2, .3, .3, .3, .3, 0]

    expected = pandas.DataFrame(
        {
            'left': [.3, .4, .5, .6, .7, .8],
            'right': [.1, .2, .3, .3, .3, .3],
        },
        index=range(3, 9))
    result_left, result_right = widgets.get_valid_points(left,
                                                         right,
                                                         0.03,
                                                         0.97,
                                                         inner=True)
    assert result_left.tolist() == [.3, .4, .5, .6, .7, .8]
    assert result_right.tolist() == [.1, .2, .3, .3, .3, .3]
Пример #2
0
def test_get_valid_points_complex():
    left = pandas.Series(
        [0.00, 0.00, 0.000, 0.00, 0.00, 0.263, 0.07, 0.081, 0.069, 0.042])
    right = pandas.Series(
        [0.00, 0.00, 0.170, 0.55, 0.947, 1.00, 1.00, 1.00, 1.00, 1.00])

    expected_left = [0.000, 0.00, 0.00, 0.263, 0.07, 0.081, 0.069, 0.042]
    expected_right = [0.170, 0.55, 0.947, 1.00, 1.00, 1.00, 1.00, 1.00]
    result_left, result_right = widgets.get_valid_points(left,
                                                         right,
                                                         dlimit=0.03)
    assert result_left.tolist() == expected_left
    assert result_right.tolist() == expected_right

    switched_result_left, switched_result_right = widgets.get_valid_points(
        right, left, 0.03)
    assert switched_result_left.tolist() == expected_right
    assert switched_result_right.tolist() == expected_left

    expected_left = [0.263, 0.07, 0.081, 0.069, 0.042]
    expected_right = [1.00, 1.00, 1.00, 1.00, 1.00]
    result_left, result_right = widgets.get_valid_points(left,
                                                         right,
                                                         0.03,
                                                         inner=True)
    assert result_left.tolist() == expected_left
    assert result_right.tolist() == expected_right

    result_left, result_right = widgets.get_valid_points(left,
                                                         right,
                                                         0.03,
                                                         0.97,
                                                         inner=True)
    assert result_left.tolist() == [] and result_right.tolist() == []
Пример #3
0
def filter_timepoints(left_trajectory: pandas.Series,
                      right_trajectory: pandas.Series, dlimit: float,
                      flimit: float) -> FilterType:
    """
		Filters the available timepoints based on the measured dynamics.
	"""
    """
	Legacy code:
	if left_was_fixed == right_was_fixed:
		left_reduced, right_reduced = widgets.get_valid_points(left_trajectory, right_trajectory, process.detection_limit, process.fixed_limit,
			inner = False)
	else:
		left_reduced, right_reduced = widgets.get_valid_points(left_trajectory, right_trajectory, process.detection_limit, inner = False)

	
	"""
    pair_category = get_pair_category(left_trajectory,
                                      right_trajectory,
                                      dlimit=dlimit,
                                      flimit=flimit)
    if pair_category == 'onlyFixed':
        left_reduced = right_reduced = None

    elif pair_category == 'partiallyFixed':
        # There is no overlap between these series so we have to rely on the overlap between "fixed" regions.
        left_reduced, right_reduced = widgets.get_valid_points(
            left_trajectory,
            right_trajectory,
            dlimit=dlimit,
            flimit=flimit,
            inner=False)

    elif pair_category == 'oneFixed':
        left_reduced, right_reduced = widgets.get_valid_points(
            left_trajectory, right_trajectory, dlimit=dlimit, inner=False)
    elif pair_category == 'notFixed':
        left_reduced, right_reduced = widgets.get_valid_points(
            left_trajectory,
            right_trajectory,
            dlimit=dlimit,
            flimit=flimit,
            inner=False)
    elif pair_category == 'bothFixed':
        left_reduced, right_reduced = widgets.get_valid_points(
            left_trajectory,
            right_trajectory,
            dlimit=dlimit,
            flimit=flimit,
            inner=False)
    else:
        message = f"Got an invalid category for a pair of trajectories: '{pair_category}'"
        logger.error(message)
        logger.error(left_trajectory.tolist())
        logger.error(right_trajectory.tolist())
        raise ValueError(message)
    return left_reduced, right_reduced
Пример #4
0
def test_get_detected_points_inner():
	left = pandas.Series([0, 0, 0, 0,   0,    0, 0.085, 0.001, 0.005])
	right = pandas.Series([0,0, 0,   0,   0,  0,0.05, 0.55, 0.5 ])
	l,r = widgets.get_valid_points(left, right, dlimit = 0.03, inner = True)

	assert l.tolist() == [0.085]
	assert r.tolist() == [0.05]
Пример #5
0
    def calculate_score_above_fixed(self, left: pandas.Series,
                                    right: pandas.Series) -> int:
        """
			Tests whether two genotypes consistently sum to a value greater than the fixed breakpoint. This suggests that one of the genotypes
			is in the background of the other, since otherwise the maximum combined frequency should, at most, be equal to the fixed cutoff value.
			Keep in mind that the variance is defined as the uncertainty in the measurements rather than computed using the given values.
		Parameters
		----------
		left, right: pandas.Series
		"""
        # Including points where one genotype was not detected will skew the results.
        left, right = widgets.get_valid_points(left,
                                               right,
                                               dlimit=self.dlimit,
                                               inner=True)
        combined_series = (left + right).tolist()[1:]

        if len(combined_series) == 0:
            result = 0
        elif len(combined_series) == 1:
            result = combined_series[0] > self.flimit
        else:
            forward_statistic, forward_pvalue = self._multiple_sample_ttest(
                left, right)
            #forward_statistic, forward_pvalue = self._single_sample_ttest(left, right)
            # Since we're using a two-sided test we need to convert it to a one-sided test.
            result = forward_pvalue / 2 < self.pvalue and forward_statistic > 0

        return int(result)
Пример #6
0
    def run(self, left: pandas.Series, right: pandas.Series):
        detected_left, detected_right = widgets.get_valid_points(left,
                                                                 right,
                                                                 0.03,
                                                                 0.97,
                                                                 inner=True)

        diff_left = detected_left.diff()
        diff_right = detected_right.diff()
        diff_series = list(zip(diff_left, diff_right))
        fig, ax = plt.subplots(figsize=(15, 15))
        ax.scatter(diff_left.values, diff_right.values)

        ax.set_xlabel("left derivative")
        ax.set_ylabel("right derivative")

        ax.set_xlim(-1, 1)
        ax.set_ylim(-1, 1)

        ax.axhline(0)
        ax.axvline(0)
        ax.plot([-1, 1], [-1, 1])
        ax.plot([-1, 1], [1, -1])
        correlated = sum([distance_correlated(i) for i in diff_series[1:]])
        anticorrelated = sum(
            [distance_anticorrelated(i) for i in diff_series[1:]])

        logger.info(f"Correlation score: {correlated}")
        logger.info(f"Anticorrelation score: {anticorrelated}")

        plt.show()
Пример #7
0
def helper_for_summation_check(model: pandas.DataFrame, left: str,
                               right: str) -> int:
    left_series = model.loc[left]
    right_series = model.loc[right]

    left_series, right_series = widgets.get_valid_points(left_series,
                                                         right_series,
                                                         0.03,
                                                         inner=False)
    scores = scoring.Score(0.03, 0.97, 0.05)
    result = scores.calculate_score_above_fixed(left_series, right_series)

    return result
Пример #8
0
    def calculate_score_greater(self, nested_genotype: pandas.Series,
                                unnested_genotype: pandas.Series) -> float:
        use_advanced = False
        series_overlap = widgets.overlap(nested_genotype, unnested_genotype,
                                         self.dlimit)
        if series_overlap == 0:
            return self.weight_greater * -1

        # THe t-test has to be corrected for the case where the two series do not completely overlap
        nested_genotype, unnested_genotype = widgets.get_valid_points(
            nested_genotype, unnested_genotype, self.dlimit)
        if use_advanced:
            raise NotImplementedError
        else:
            score = self.calculate_score_greater_basic(nested_genotype,
                                                       unnested_genotype)

        return float(score)  # Cast to float so the dtypes are consistent
Пример #9
0
    def calculate_score_derivative(self, left: pandas.Series,
                                   right: pandas.Series) -> float:
        """
			Tests whther the two series are correlated or anticorrelated with each other. The scoring is as follows:
			correlated: 2
			uncorrelated: 0
			anticorrelated: -2
		Parameters
		----------
		left, right: pandas.Series
			The two series to test.
		"""
        # Pandas implementation of the derivative check, since it basically just checks for covariance.
        valid_left, valid_right = widgets.get_valid_points(left,
                                                           right,
                                                           self.dlimit,
                                                           self.flimit,
                                                           inner=True)

        if valid_left.empty:
            score = 0
        elif len(valid_left) > 20 or True:
            dotproduct, correlated_timepoints = self.derivative(
                valid_left, valid_right)
            #logger.debug(f"{dotproduct}, {correlated_timepoints}, {len(valid_left)}")
            #logger.debug(valid_left.diff().tolist())
            #logger.debug(valid_right.diff().tolist())
            if dotproduct > 0.01:
                score = 1
            elif dotproduct < -0.01:
                score = -1
            else:
                score = 0
        else:
            covariance = valid_left.cov(valid_right)
            if covariance > 0.01: score = 1
            elif covariance < -0.01: score = -1
            else: score = 0
        score = score * self.weight_derivative
        return score
Пример #10
0
    def calculate_derivative_score(self, left: pandas.Series,
                                   right: pandas.Series) -> float:
        """
			Tests whther the two series are correlated or anticorrelated with each other. The scoring is as follows:
			correlated: 2
			uncorrelated: 0
			anticorrelated: -2
		Parameters
		----------
		left, right: pandas.Series
			The two series to test.
		"""
        # Pandas implementation of the derivative check, since it basically just checks for covariance.
        valid_left, valid_right = widgets.get_valid_points(
            left, right, self.dlimit, self.flimit)
        if valid_left.empty:
            covariance = math.nan
        else:
            covariance = valid_left.cov(valid_right)

        if covariance > 0.01: score = 2
        elif covariance < -0.01: score = -2
        else: score = 0
        return score
Пример #11
0
def test_get_detected_points(left, right, index):
	l = pandas.Series(left)
	r = pandas.Series(right)
	rl, rr = widgets.get_valid_points(l, r, 0.03)
	assert list(rl.index) == list(rr.index)
	assert list(rl.index) == index
Пример #12
0
    def score_pair(self, nested_genotype: pandas.Series,
                   unnested_trajectory) -> Dict[str, float]:
        detected_left, detected_right = widgets.get_valid_points(
            nested_genotype,
            unnested_trajectory,
            dlimit=self.dlimit,
            inner=False)
        if self.debug:
            logger.debug(f"Scoring a pair of series:")
            logger.debug(
                f"\t{nested_genotype.name}\t{nested_genotype.values}\t{nested_genotype.index}"
            )
            logger.debug(
                f"\t{unnested_trajectory.name}\t{unnested_trajectory.values}\t{unnested_trajectory.index}"
            )
            logger.debug(f"{self.dlimit}, {self.flimit}, {self.pvalue}")
            logger.debug(f"The detected portion of the series: ")
            logger.debug(f"\t{detected_left.values}")
            logger.debug(f"\t{detected_right.values}")

        if len(detected_left) < 3:
            score_fixed = self.legacy_scorer.calculate_summation_score(
                detected_left, detected_right)
        else:
            score_fixed = self.calculate_score_above_fixed(
                detected_left, detected_right)

        score_greater = self.calculate_score_greater(detected_left,
                                                     detected_right)
        if math.isnan(score_greater): score_greater = 0
        score_area = self.calculate_score_area(nested_genotype,
                                               unnested_trajectory)

        total_score = score_fixed + score_greater + score_area
        if self.debug:
            logger.debug(
                f"{nested_genotype.name}\t{unnested_trajectory.name}\t{score_fixed}\t{score_greater}\t{score_area}\t{total_score}"
            )
        if total_score > 0:
            # The derivative check is only useful when deciding between possible candidates, since it does not provide evidence itself that a
            # genotype is a potential background. So, at least one of the other checks should have been passed with no
            # evidence against the candidate background.

            # The derivative score should only be computed using the timepoints where the series overlap.
            detected_left, detected_right = widgets.get_valid_points(
                nested_genotype,
                unnested_trajectory,
                dlimit=self.dlimit,
                inner=True)
            score_derivative = self.calculate_score_derivative(
                detected_left, detected_right)
            # Note that a previous version accidentlly added the derivative cutoff to the total score.
            total_score += score_derivative
        else:
            score_derivative = math.nan
        if math.isnan(score_derivative): score_derivative = 0
        score_data = {
            'nestedGenotype': nested_genotype.name,
            'unnestedGenotype': unnested_trajectory.name,
            'scoreGreater': score_greater,
            'scoreFixed': score_fixed,
            'scoreArea': score_area,
            'scoreDerivative': score_derivative,
            'totalScore': total_score
        }
        return score_data