def test_result_label_missmatch_with_res_corr(self): """Possible result for missmatch with resolution correction.""" label1 = ip.Label("2N15 4C13", self.molecule_info2) label2 = ip.Label("1N15 6C13", self.molecule_info2) res_corr_info = rc.ResolutionCorrectionInfo(True, 60000, 200, self.molecule_info2) res = ic.calc_transition_prob(label1, label2, res_corr_info) self.assertAlmostEqual(res, 0.1794381)
def test_result_label_missmatch_without_res_corr(self): """Zero result with no possible transition without res_corr.""" label1 = ip.Label("2N15 4C13", self.molecule_info2) label2 = ip.Label("1N15 6C13", self.molecule_info2) res_corr_info = rc.ResolutionCorrectionInfo(False, 60000, 200, self.molecule_info2) res = ic.calc_transition_prob(label1, label2, res_corr_info) self.assertEqual(res, 0)
def test_init_return_types(self): """Return correct types for attributes.""" inst = rc.ResolutionCorrectionInfo(True, 100000, 600, self.molecule_info) self.assertIsInstance(inst.do_correction, bool) self.assertIsInstance(inst.resolution, Number) self.assertIsInstance(inst.mz_calibration, Number) self.assertIsInstance(inst.molecule_info, MoleculeInfo) self.assertIsInstance(inst.molecule_mass, Number) self.assertIsInstance(inst.min_mass_diff, Number)
def test_generate_labels_error(self): """Catch ValueError for generated labels too large for molecule.""" mol = MoleculeInfo.get_molecule_info( molecule_formula="C2H8O1", molecule_charge=1, molecules_file=self.molecules_file, isotopes_file=self.isotopes_file, ) res_corr_info = rc.ResolutionCorrectionInfo(False, 60000, 200, mol) res = len(list(rc.generate_labels(3, res_corr_info))) self.assertEqual(res, 9)
def test_indirect_overlap_warn(self): """Warning with indirectly overlapping labels.""" res_corr_info_high = rc.ResolutionCorrectionInfo( False, 200000, 200, self.molecule_info ) label_list = [ LabelTuple(["3H02", "4C13"], self.molecule_info), LabelTuple(["1H02 1C13", "5C13"], self.molecule_info), LabelTuple(["2N15 1H02", "2N15 3C13"], self.molecule_info), ] for labels in label_list: with self.subTest(): with self.assertWarns(UserWarning): rc.warn_indirect_overlap(labels, res_corr_info_high)
class TestGenerateLabels(unittest.TestCase): """generate_labels.""" molecules_file = Path("tests/test_metabolites.csv") isotopes_file = Path("tests/test_isotopes.csv") molecule_info = MoleculeInfo.get_molecule_info( molecule_name="Test1", molecules_file=molecules_file, isotopes_file=isotopes_file, ) res_corr_info = rc.ResolutionCorrectionInfo(False, 60000, 200, molecule_info) def test_generate_labels_res(self): """Return correct result fo rsmall example.""" res = list(rc.generate_labels(2, self.res_corr_info)) res_corr = [ Label(la, self.molecule_info) for la in [ "1O18", "2O17", "1N151O17", "2N15", "1H021O17", "1H021N15", "2H02", "1C131O17", "1C131N15", "1C131H02", "2C13", ] ] self.assertListEqual(res, res_corr) def test_generate_labels_error(self): """Catch ValueError for generated labels too large for molecule.""" mol = MoleculeInfo.get_molecule_info( molecule_formula="C2H8O1", molecule_charge=1, molecules_file=self.molecules_file, isotopes_file=self.isotopes_file, ) res_corr_info = rc.ResolutionCorrectionInfo(False, 60000, 200, mol) res = len(list(rc.generate_labels(3, res_corr_info))) self.assertEqual(res, 9)
class TestOverlapWarnings(unittest.TestCase): """Overlap warnings.""" molecules_file = Path("tests/test_metabolites.csv") isotopes_file = Path("tests/test_isotopes.csv") molecule_info = MoleculeInfo.get_molecule_info( molecule_name="Test1", molecules_file=molecules_file, isotopes_file=isotopes_file, ) res_corr_info = rc.ResolutionCorrectionInfo(False, 60000, 200, molecule_info) def test_direct_overlap_warn(self): """Warning with overlapping labels.""" with self.assertWarns(UserWarning): rc.warn_direct_overlap( LabelTuple(["4H02", "4C13"], self.molecule_info), self.res_corr_info ) def test_direct_overlap_not_warn(self): """No warning with non-overlapping labels.""" with pytest.warns(None) as warnings: rc.warn_direct_overlap( LabelTuple(["3H02", "4C13"], self.molecule_info), self.res_corr_info ) assert not warnings def test_indirect_overlap_warn(self): """Warning with indirectly overlapping labels.""" res_corr_info_high = rc.ResolutionCorrectionInfo( False, 200000, 200, self.molecule_info ) label_list = [ LabelTuple(["3H02", "4C13"], self.molecule_info), LabelTuple(["1H02 1C13", "5C13"], self.molecule_info), LabelTuple(["2N15 1H02", "2N15 3C13"], self.molecule_info), ] for labels in label_list: with self.subTest(): with self.assertWarns(UserWarning): rc.warn_indirect_overlap(labels, res_corr_info_high)
class TestMassCalculations(unittest.TestCase): """Molecule mass and minimum mass difference.""" molecules_file = Path("tests/test_metabolites.csv") isotopes_file = Path("tests/test_isotopes.csv") molecule_info = MoleculeInfo.get_molecule_info( molecule_name="Test1", molecules_file=molecules_file, isotopes_file=isotopes_file, ) res_corr_info = rc.ResolutionCorrectionInfo(False, 60000, 200, molecule_info) def test_calc_min_mass_diff_result(self): """Minimal mass difference.""" res = rc.ResolutionCorrectionInfo.calc_min_mass_diff(680, 2, 200, 50000) self.assertAlmostEqual(res, 0.029436, places=6) def test_calc_min_mass_diff_negative_mass(self): """Minimal mass difference.""" with self.assertRaises(ValueError): rc.ResolutionCorrectionInfo.calc_min_mass_diff(-680, 2, 200, 50000) def test_is_overlap_true(self): """Overlapping isotopologues""" res = rc.is_isotologue_overlap( Label("5C13", self.molecule_info), Label("4C13 1H02", self.molecule_info), self.res_corr_info, ) self.assertTrue(res) def test_is_overlap_false(self): """Non overlapping isotopologues""" res = rc.is_isotologue_overlap( Label("14C13", self.molecule_info), Label("14H02", self.molecule_info), self.res_corr_info, ) self.assertFalse(res) def test_coarse_mass_difference_result(self): """Difference in nucleons.""" res = rc.calc_coarse_mass_difference( Label("No label", self.molecule_info), Label("5C13 3N15 2H02", self.molecule_info), ) self.assertEqual(res, 10) def test_coarse_mass_difference_bad_type(self): """Non Label as input.""" label = Label("No label", self.molecule_info) non_label = {"C13": 12} label_list = [(label, non_label), (non_label, label), (non_label, non_label)] for la1, la2 in label_list: with self.subTest(): with self.assertRaises(TypeError): rc.calc_coarse_mass_difference(la1, la2) def test_fwhm_result(self): """Result with valid input.""" mz_cal, mz, resolution = 200, 500, 50_000 res = rc.ResolutionCorrectionInfo.fwhm(mz_cal, mz, resolution) self.assertAlmostEqual(res, 0.01581139) def test_fwhm_bad_input(self): """Result with valid input.""" for mz_cal, mz, resolution in itertools.product([200, -200], repeat=3): if all(ele > 0 for ele in [mz_cal, mz, resolution]): # skip case in which all are positive continue with self.subTest(): with self.assertRaises(ValueError): rc.ResolutionCorrectionInfo.fwhm(mz_cal, mz, resolution)
class TestTransitionProbability(unittest.TestCase): """Calculation of probability between to isotopologues.""" molecules_file = Path("tests/test_metabolites.csv") isotopes_file = Path("tests/test_isotopes.csv") molecule_info = ip.MoleculeInfo.get_molecule_info( molecule_name="Test4", molecules_file=molecules_file, isotopes_file=isotopes_file, ) molecule_info2 = ip.MoleculeInfo.get_molecule_info( molecule_name="Test1", molecules_file=molecules_file, isotopes_file=isotopes_file, ) res_corr_info = rc.ResolutionCorrectionInfo(False, 60000, 200, molecule_info) def test_result_label1_smaller(self): """Result with label1 being smaller than label2.""" label1 = ip.Label("2N15", self.molecule_info) label2 = ip.Label("2N152C13", self.molecule_info) res = ic.calc_transition_prob(label1, label2, self.res_corr_info) self.assertAlmostEqual(res, 0.03685030) def test_result_label1_equal(self): """Result with label1 being equal to label2.""" label1 = ip.Label("1N15", self.molecule_info) res = ic.calc_transition_prob(label1, label1, self.res_corr_info) self.assertEqual(res, 0) def test_result_label_missmatch_without_res_corr(self): """Zero result with no possible transition without res_corr.""" label1 = ip.Label("2N15 4C13", self.molecule_info2) label2 = ip.Label("1N15 6C13", self.molecule_info2) res_corr_info = rc.ResolutionCorrectionInfo(False, 60000, 200, self.molecule_info2) res = ic.calc_transition_prob(label1, label2, res_corr_info) self.assertEqual(res, 0) def test_result_label_missmatch_with_res_corr(self): """Possible result for missmatch with resolution correction.""" label1 = ip.Label("2N15 4C13", self.molecule_info2) label2 = ip.Label("1N15 6C13", self.molecule_info2) res_corr_info = rc.ResolutionCorrectionInfo(True, 60000, 200, self.molecule_info2) res = ic.calc_transition_prob(label1, label2, res_corr_info) self.assertAlmostEqual(res, 0.1794381) def test_result_label1_larger(self): """Result with label1 being larger than label2.""" label1 = ip.Label("2N152C13", self.molecule_info) label2 = ip.Label("2N15", self.molecule_info) res = ic.calc_transition_prob(label1, label2, self.res_corr_info) self.assertEqual(res, 0) def test_result_molecule_formula(self): """Result with molecule formula.""" molecule_info = ip.MoleculeInfo.get_molecule_info( molecule_name="Test1", molecules_file=self.molecules_file, isotopes_file=self.isotopes_file, ) label1 = ip.Label("1N15", molecule_info) label2 = ip.Label("2N152C13", molecule_info) res = ic.calc_transition_prob(label1, label2, self.res_corr_info) self.assertAlmostEqual(res, 0.00042029) def test_wrong_type(self): """Type error with dict as molecule.""" label1 = ip.Label("1N15", self.molecule_info) label2 = ip.Label("2N152C13", self.molecule_info) molecule = {"C": 12, "N": 15} with self.assertRaises(TypeError): ic.calc_transition_prob(label1, label2, molecule, self.res_corr_info)
def calc_isotopologue_correction( raw_data, molecule_name=None, molecules_file=None, molecule_formula=None, molecule_charge=None, subset=False, exclude_col=False, resolution_correction=False, mz_calibration=200, resolution=60000, isotopes_file=None, logging_level="WARNING", ): """Calculate isotopologue correction factor for molecule. Takes pandas DataFrame and calculates isotopologue correction for molecule in molecules file, returns DataFrame with corrected values. Only C13 and N15 is supported as column labels right now e.g. 5C13 Parameters ---------- raw_data : pandas.DataFrame DataFrame of integrated lowest peaks per species vs time molecule_name : str Molecule name as in molecules_file. molecules_file : str or Path tab-separated file with name, formula and charge as rows e.g. Suc C4H4O3 -1 molecule_formula : str Chemical formula as string. No spaces or underscores allowed. E.g. "C3H7O1" molecule_charge : int Charge as signed integer subset : list of str or False, optional List of column names to use for calculation exclude_col : list of str, optional Columns to ignore in calculation resolution_correction : bool, optional Run additonal correction for isotopologues overlaping due to low resolution. For example H02 and C13 mz_calibration : float, optional mass-charge ratio of calibration point resolution : float, optional Resolution at calibration mz isotopes_file : Path or str, optional tab-separated file with element, mass, abundance and isotope as rows e.g. H 1.008 0.99 H01 logging_level : str Logging is output to stderr Possible levels: "DEBUG", "INFO", "WARNING", "CRITICAL" Default level: "WARNING" Returns ------- pandas.DataFrame Corrected data """ logging.basicConfig(level=os.environ.get("LOGLEVEL", logging_level)) dir_path = Path(__file__).parent if not isotopes_file: isotopes_file = dir_path / "isotopes.csv" if not molecules_file: molecules_file = dir_path / "metabolites.csv" if not subset: subset = raw_data.columns if exclude_col: subset = list(set(subset) - set(exclude_col)) molecule_info = ip.MoleculeInfo.get_molecule_info(molecule_name, molecules_file, molecule_formula, molecule_charge, isotopes_file) subset = ip.LabelTuple(subset, molecule_info) res_corr_info = rc.ResolutionCorrectionInfo(resolution_correction, resolution, mz_calibration, molecule_info) if res_corr_info.do_correction: rc.warn_direct_overlap(subset, res_corr_info) data = correct_data(raw_data, subset, res_corr_info) return data