def test_scale_dataframe(): df = pd.DataFrame({"A": [1, 2, 3, 4], "B": [10, 20, 30, 40]}) #If scalled, A and B should be same df_sum = list(core.scale_dataframe(df).sum()) assert df_sum[0] == df_sum[1]
def create_inter_samples_model(self): """Create inter sample model This model compute the mean and the standard deviation from training sampling. This will be used later to compute a inter z-score in a new sample """ self.norm_raw = core.scale_dataframe(self.raw) self.inter_model = pd.DataFrame({ "mean": self.norm_raw.mean(axis=1), "median": self.norm_raw.median(axis=1), "std": self.norm_raw.std(axis=1), "min": self.norm_raw.min(axis=1), "max": self.norm_raw.max(axis=1), })
def test_sample(self, bamfile: str) -> pd.DataFrame: """Test a new sample against the current model model = Model("model.h5") data = model.test_sample("sample.bam") The dataframe contains for each position: - depth : the raw depth - depth_norm: the normalized raw depth - inter_z: the inter-model z-score - depth_mate: the raw depth of the mate - depth_mate_predicted: the raw depth predicted by the intra-model - intra_z: the intra-model z-score Args: bamfile (str): A sample bam file Returns: pd.DataFrame """ del_coverage = core.get_coverages_from_bed([bamfile], self.bedfile) dd = del_coverage.copy() dd.columns = ["depth"] # Compute inter model dd["depth_norm"] = core.scale_dataframe(dd)["depth"] dd["inter_z"] = (dd["depth_norm"] - self.inter_model["mean"]) / self.inter_model["std"] # Compute intra model subset = dd.loc[self.intra_model.index] subset["depth_mate"] = subset.iloc[ self.intra_model["idx"], :].iloc[:, 0].values subset["depth_mate_predicted"] = ( self.intra_model["coef"] * subset["depth"]) + self.intra_model["intercept"] subset["corr"] = self.intra_model["corr"] subset["error"] = subset["depth_mate_predicted"] - subset["depth_mate"] subset["intra_z"] = subset["error"] / self.intra_model["std"] subset.drop(["depth", "depth_norm", "inter_z"], axis=1) test_data = dd.join( subset.drop(["depth", "depth_norm", "inter_z"], axis=1)) return test_data
def test_sample(self, bamfile: str, show_progress=True) -> pd.DataFrame: """Test a new sample against the current model model = Model("model.h5") data = model.test_sample("sample.bam") The dataframe contains for each position: - depth : the raw depth - depth_norm: the normalized raw depth - inter_z: the inter-model z-score - depth_mate: the raw depth of the mate - depth_mate_predicted: the raw depth predicted by the intra-model - intra_z: the intra-model z-score Args: bamfile (str): A sample bam file Returns: pd.DataFrame """ del_coverage = core.get_coverages_from_bed( bamfile, self.bedfile, sample_rate=self.sample_rate, show_progress=show_progress, ) dd = del_coverage.copy() dd.columns = ["depth"] # Compute inter model dd["depth_norm"] = core.scale_dataframe(dd)["depth"] dd["inter_z"] = (dd["depth_norm"] - self.super_model["mean"]) / self.super_model["std"] # # Compute intra model depth_mate = dd.iloc[self.super_model["idx"], :]["depth"].to_list() dd["depth_mate"] = depth_mate dd["depth_mate_predicted"] = (self.super_model["coef"] * dd["depth"] ) + self.super_model["intercept"] dd["error_intra"] = dd["depth_mate_predicted"] - dd["depth_mate"] dd["intra_z"] = dd["error_intra"] / self.super_model["std2"] return dd