Пример #1
0
def test_scale_dataframe():

    df = pd.DataFrame({"A": [1, 2, 3, 4], "B": [10, 20, 30, 40]})

    #If scalled, A and B should be same
    df_sum = list(core.scale_dataframe(df).sum())
    assert df_sum[0] == df_sum[1]
Пример #2
0
 def create_inter_samples_model(self):
     """Create inter sample model
     This model compute the mean and the standard deviation from training sampling.
     This will be used later to compute a inter z-score in a new sample
     """
     self.norm_raw = core.scale_dataframe(self.raw)
     self.inter_model = pd.DataFrame({
         "mean": self.norm_raw.mean(axis=1),
         "median": self.norm_raw.median(axis=1),
         "std": self.norm_raw.std(axis=1),
         "min": self.norm_raw.min(axis=1),
         "max": self.norm_raw.max(axis=1),
     })
Пример #3
0
    def test_sample(self, bamfile: str) -> pd.DataFrame:
        """Test a new sample against the current model
        
        model = Model("model.h5")
        data = model.test_sample("sample.bam")
        
        The dataframe contains for each position:
            - depth : the raw depth 
            - depth_norm: the normalized raw depth
            - inter_z: the inter-model z-score
            - depth_mate: the raw depth of the mate 
            - depth_mate_predicted: the raw depth predicted by the intra-model
            - intra_z: the intra-model z-score 
        
        Args:
            bamfile (str): A sample bam file
        
        Returns:
            pd.DataFrame
        """
        del_coverage = core.get_coverages_from_bed([bamfile], self.bedfile)

        dd = del_coverage.copy()
        dd.columns = ["depth"]

        # Compute inter model
        dd["depth_norm"] = core.scale_dataframe(dd)["depth"]
        dd["inter_z"] = (dd["depth_norm"] -
                         self.inter_model["mean"]) / self.inter_model["std"]

        # Compute intra model
        subset = dd.loc[self.intra_model.index]
        subset["depth_mate"] = subset.iloc[
            self.intra_model["idx"], :].iloc[:, 0].values
        subset["depth_mate_predicted"] = (
            self.intra_model["coef"] *
            subset["depth"]) + self.intra_model["intercept"]
        subset["corr"] = self.intra_model["corr"]
        subset["error"] = subset["depth_mate_predicted"] - subset["depth_mate"]
        subset["intra_z"] = subset["error"] / self.intra_model["std"]

        subset.drop(["depth", "depth_norm", "inter_z"], axis=1)
        test_data = dd.join(
            subset.drop(["depth", "depth_norm", "inter_z"], axis=1))

        return test_data
Пример #4
0
    def test_sample(self, bamfile: str, show_progress=True) -> pd.DataFrame:
        """Test a new sample against the current model
        
        model = Model("model.h5")
        data = model.test_sample("sample.bam")
        
        The dataframe contains for each position:
            - depth : the raw depth 
            - depth_norm: the normalized raw depth
            - inter_z: the inter-model z-score
            - depth_mate: the raw depth of the mate 
            - depth_mate_predicted: the raw depth predicted by the intra-model
            - intra_z: the intra-model z-score 
        
        Args:
            bamfile (str): A sample bam file
        
        Returns:
            pd.DataFrame
        """
        del_coverage = core.get_coverages_from_bed(
            bamfile,
            self.bedfile,
            sample_rate=self.sample_rate,
            show_progress=show_progress,
        )
        dd = del_coverage.copy()
        dd.columns = ["depth"]

        # Compute inter model
        dd["depth_norm"] = core.scale_dataframe(dd)["depth"]
        dd["inter_z"] = (dd["depth_norm"] -
                         self.super_model["mean"]) / self.super_model["std"]

        # # Compute intra model
        depth_mate = dd.iloc[self.super_model["idx"], :]["depth"].to_list()
        dd["depth_mate"] = depth_mate
        dd["depth_mate_predicted"] = (self.super_model["coef"] * dd["depth"]
                                      ) + self.super_model["intercept"]
        dd["error_intra"] = dd["depth_mate_predicted"] - dd["depth_mate"]
        dd["intra_z"] = dd["error_intra"] / self.super_model["std2"]

        return dd