def test_metrics_features_threshold_error(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) with pytest.raises(ValueError,match=r"Feature threshold too high, all samples would be removed."): fdata,fgenes,fbc, QC_metaobj_50x40 = qc.metrics(filter_count_matrix=True,remove_cell_cycle=True, UMI_thresh = 1500,Features_thresh = 50, log10FeaturesPerUMI_thresh = 0.002, FeaturesPerUMI_thresh = 0.0001,mtRatio_thresh = 0.5)
def test_metrics_with_filter_DATAFRAME(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) fdata,fgenes,fbc, QC_metaobj_50x40 = qc.metrics(UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002, FeaturesPerUMI_thresh = 0.0001,mtRatio_thresh = 0.5, filter_count_matrix=True, remove_cell_cycle=False) assert QC_metaobj_50x40.shape == (len(self.mtx_df_50x40),6) assert np.all(QC_metaobj_50x40.columns == ['nUMI', 'nFeatures', 'FeaturesPerUMI','log10FeaturesPerUMI', 'mtUMI','mitoRatio']) assert np.any(QC_metaobj_50x40.isna()) == False assert isinstance(fdata,spsp.csc.csc_matrix) assert isinstance(fbc,list) assert isinstance(fgenes,list) assert len(fbc) == 32 assert len(fgenes) == 40 assert fdata.shape == (32,40) assert fdata.sum() == 63358 assert len(set(fbc).difference(set(['AAACATTGAGCTAC-1','AAACATACAACCAC-1','AAACATTGATCAGC-1','AAACCGTGTATGCG-1','AAACGCACTGGTAC-1', 'AAACGCTGACCAGT-1','AAACTTGATCCAGA-1', 'AAAGAGACGCGAGA-1', 'AAAGAGACGGCATT-1','AAAGCAGAAGCCAT-1', 'AAAGCAGATATCGG-1','AAAGCCTGTATGCG-1', 'AAAGTTTGATCACG-1', 'AAAGTTTGGGGTGA-1', 'AAAGTTTGTAGAGA-1','AAAGTTTGTAGCGT-1', 'AAATCAACCCTATT-1', 'AAATCAACGGAAGC-1','AAATCAACTCGCAA-1', 'AAATCCCTCCACAA-1', 'AAATCCCTGCTATG-1','AAATGTTGAACGAA-1', 'AAATGTTGCCACAA-1','AAATGTTGTGGCAT-1','AAATTCGAAGGTTC-1','AAATTCGAGCTGAT-1','AAACATTGATCTAC-1','AAACATTGATCCGC-1', 'ATACGCACTGGTAC-1', 'AAACGCTGATCAGT-1','AAACGCGGGTTCTT-1','AAAAGCTGTAGCCA-1']))) == 0 assert len(set(fgenes).difference(set(['ENSG00000243485', 'ENSG00000237613','ENSG00000186092','ENSG00000238009','ENSG00000239945','ENSG00000237683', 'ENSG00000239906', 'ENSG00000241599','ENSG00000228463','ENSG00000237094','ENSG00000235249', 'ENSG00000236601','ENSG00000236743', 'ENSG00000231709', 'ENSG00000239664', 'ENSG00000230021', 'ENSG00000223659','ENSG00000185097','ENSG00000235373', 'ENSG00000240618','ENSG00000229905', 'ENSG00000010292','ENSG00000011426','ENSG00000129055', 'ENSG00000177757', 'ENSG00000225880', 'ENSG00000230368','ENSG00000269308', 'ENSG00000272438','ENSG00000230699','ENSG00000210049','ENSG00000211459','ENSG00000097007', 'ENSG00000210082', 'ENSG00000241180', 'ENSG00000223764', 'ENSG00000187634','ENSG00000268179', 'ENSG00000188976','ENSG00000187961']))) == 0
def test_log10FeaturesPerUMI_thresh(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) fdata1,fgenes1,fbc1 = qc.filter_count_matrix(log10FeaturesPerUMI_thresh=.48) qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) fdata2,fgenes2,fbc2, QC_metaobj_50x40 = qc.metrics(filter_count_matrix=True,log10FeaturesPerUMI_thresh=.48) assert fdata1.shape == fdata2.shape
def test_metrics_no_filter_CSC(self): qc=QualityControl(self.csc_50x40,self.genes,self.barcodes) QC_metaobj_50x40_csc = qc.metrics(filter_count_matrix=False) # UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002,FeaturesPerUMI_thresh = 0.0001,mtRatio_thresh = 0.5, assert QC_metaobj_50x40_csc.shape == (self.csc_50x40.shape[0],6) assert np.all(QC_metaobj_50x40_csc.columns == ['nUMI', 'nFeatures', 'FeaturesPerUMI','log10FeaturesPerUMI', 'mtUMI','mitoRatio']) assert np.any(QC_metaobj_50x40_csc.isna()) == False np.testing.assert_approx_equal(sum(sum(QC_metaobj_50x40_csc.values)), 107641.13463,significant=4, err_msg='metrics sum incorrect')
def test_metrics_no_filter_DATAFRAME(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) QC_metaobj_50x40 = qc.metrics(UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002, FeaturesPerUMI_thresh = 0.0001,mtRatio_thresh = 0.5,filter_count_matrix=False) assert QC_metaobj_50x40.shape == (len(self.mtx_df_50x40),6) assert np.all(QC_metaobj_50x40.columns == ['nUMI', 'nFeatures', 'FeaturesPerUMI','log10FeaturesPerUMI', 'mtUMI','mitoRatio']) assert np.any(QC_metaobj_50x40.isna()) == False assert sum(sum(QC_metaobj_50x40.values)) == 107641.1346368048 np.testing.assert_approx_equal(sum(sum(QC_metaobj_50x40.values)), 107641.13463,significant=4, err_msg='metrics sum incorrect')
def test_filter_both(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) QC_metaobj_50x40 = qc.metrics(filter_count_matrix=False) fdata1,genes1,bc1 = qc.filter_count_matrix(QC_metaobj_50x40,remove_cell_cycle=False,UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002,FeaturesPerUMI_thresh= 0.0001,mtRatio_thresh = 0.5) qc2=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) fdata2, genes2, bc2 = qc2.filter_count_matrix(QC_metaobj=None,remove_cell_cycle=False,UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002,FeaturesPerUMI_thresh= 0.0001,mtRatio_thresh = 0.5) assert fdata1.shape == fdata2.shape assert np.all(bc1 == bc2) assert np.all(genes1 == genes2)
def test_metrics_with_filter_df_verbose(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) fdata,fgenes,fbc, QC_metaobj_50x40 = qc.metrics(UMI_thresh = 1500, Features_thresh = 39, log10FeaturesPerUMI_thresh = 0.002, FeaturesPerUMI_thresh = 0.0001, mtRatio_thresh = 0.5, filter_count_matrix=True, remove_cell_cycle=False, verbose=True)
def test_filter_with_QC_Obj(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) QC_metaobj_50x40 = qc.metrics(filter_count_matrix=False) fdata,fgenes,fbc = qc.filter_count_matrix(QC_metaobj_50x40,remove_cell_cycle=False,UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002, FeaturesPerUMI_thresh = 0.0001,mtRatio_thresh = 0.5) assert isinstance(fdata,spsp.csc.csc_matrix) assert isinstance(fbc,list) assert isinstance(fgenes,list) assert np.shape(fdata) == (32,40) assert fdata.sum() == 63358
def test_filter_with_QC_Obj_removeCC(self): '''test QC filter function with QC_metaobj (call metrics to get QC_metaobj and pass it to filter_count_matrix with mtx_df)''' qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) QC_metaobj_50x40 = qc.metrics(UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002, FeaturesPerUMI_thresh = 0.0001,mtRatio_thresh = 0.5,filter_count_matrix=False) fdata, fgenes, fbc = qc.filter_count_matrix(QC_metaobj_50x40,remove_cell_cycle=True,UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002, FeaturesPerUMI_thresh = 0.0001,mtRatio_thresh = 0.5)#,nUMI=500,nFeatures=500,FeaturesPerUMI=0.3,mtRatio=0.05) assert isinstance(fdata,spsp.csc.csc_matrix) assert isinstance(fbc,list) assert isinstance(fgenes,list) assert np.shape(fdata) == (32,38) assert fdata.sum() == 60257
def test_metrics_filter_no_args(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) with pytest.raises(ValueError): fdata,fgenes,fbc,QC_metaobj_50x40 = qc.metrics(filter_count_matrix=True)