def background(list_of_replicates, input_fragment_value, unlabeled_fragment_value, isotope_dict): parent_frag, daughter_frag = input_fragment_value.frag iso_elem = helpers.get_isotope_element(parent_frag.isotracer) parent_label = parent_frag.get_num_labeled_atoms_isotope( parent_frag.isotracer) parent_atoms = parent_frag.number_of_atoms(iso_elem) na = helpers.get_isotope_na(parent_frag.isotracer, isotope_dict) daughter_atoms = daughter_frag.number_of_atoms(iso_elem) daughter_label = daughter_frag.get_num_labeled_atoms_isotope( parent_frag.isotracer) replicate_value = {} for replicate_group in list_of_replicates: background_list = [] for each_replicate in replicate_group: noise = background_noise( unlabeled_fragment_value.data[each_replicate], na, parent_atoms, parent_label, daughter_atoms, daughter_label) background = background_subtraction( input_fragment_value.data[each_replicate], noise) background_list.append(background) background_value = max(background_list) for each_replicate in replicate_group: replicate_value[each_replicate] = background_value return replicate_value
def na_correction_mimosa_by_fragment(fragments_dict, isotope_dict, decimals): fragment_dict_mass = change_fragment_keys_to_mass(fragments_dict) corrected_dict_mass = {} for key, value in fragment_dict_mass.iteritems(): m_1_n = (key[0] - 1, key[1]) m_1_n_1 = (key[0] - 1, key[1] - 1) parent_frag_m, daughter_frag_n = value.frag isotope = parent_frag_m.isotracer na = helpers.get_isotope_na(isotope, isotope_dict) corrected_data = {} for sample_name, intensity_m_n in value.data.iteritems(): try: intensity_m_1_n = fragment_dict_mass[m_1_n].data[sample_name] except KeyError: intensity_m_1_n = 0 try: intensity_m_1_n_1 = fragment_dict_mass[m_1_n_1].data[ sample_name] except KeyError: intensity_m_1_n_1 = 0 corrected_data[sample_name] = na_correct_mimosa_algo_array( parent_frag_m, daughter_frag_n, intensity_m_n, intensity_m_1_n, intensity_m_1_n_1, isotope, na, decimals) corrected_dict_mass[key] = Infopacket(value.frag, corrected_data, value.unlabeled, value.name) return corrected_dict_mass
def na_correction_mimosa(msms_df, isBackground, isotope_dict=const.ISOTOPE_NA_MASS): """ This function performs NA correction on the input data frame for LCMS/MS file. For performing NA correction, it first calculates total number of atoms and number of labeled atoms present in both parent formula and daughter formula of each compound. Then for each sample it corrects the intensity values of each compound one by one using formula which includs the number of atoms, number of labeled atoms and intensity of M+0 isotope. Args: msms_df: Dataframe which contains intensities to be corrected. isBackground: boolean- True if background correction performed on dataframe otherwise False isotope_dict: Dictionary of NA values of isotopes. Ex: na_dict={'naValue':{'C13':[0.9889,0.0111], 'N':[0.9964,0.0036], 'O':[0.9976,0.0004,0.002], 'H':[0.99985,0.00015], 'S':[0.95,0.0076,0.0424], }} Returns: output_df: na corrected dataframe """ if isBackground: final_df = msms_df isotracer = msms_df[multiquant.ISOTRACER].unique() intensity_col = const.BACKGROUND_WITH_ZERO else: final_df, isotracer = \ multiquant_parser.add_mass_and_no_of_atoms_info_frm_label(msms_df) intensity_col = const.INTENSITY_COL final_df[const.NA_CORRECTED_COL] = 0.0 output_df = pd.DataFrame() na = get_isotope_na(isotracer[0], isotope_dict) final_df['A'] = (1 + na * (final_df[const.PARENT_NUM_ATOMS] - final_df[const.PARENT_NUM_LABELED_ATOMS])) final_df['B'] = na * ((final_df[const.PARENT_NUM_ATOMS] - final_df[const.DAUGHTER_NUM_ATOMS]) - (final_df[const.PARENT_NUM_LABELED_ATOMS] - final_df[const.DAUGHTER_NUM_LABELED_ATOMS] - 1)) final_df['C'] = na * (final_df[const.DAUGHTER_NUM_ATOMS] - (final_df[const.DAUGHTER_NUM_LABELED_ATOMS] - 1)) final_df.drop([const.PARENT_MASS_MOL, const.DAUGHTER_MASS_MOL, const.PARENT_NUM_ATOMS, const.DAUGHTER_NUM_ATOMS, const.DAUGHTER_NUM_LABELED_ATOMS, const.PARENT_NUM_LABELED_ATOMS], axis=1, inplace=True) metabolites = final_df[const.NAME_COL].unique() for metab in metabolites: metabolite_df = final_df[final_df[const.NAME_COL] == metab].reset_index() for samp in metabolite_df.Sample.unique(): """ Create metabolite dictionary of the form: {'SAMPLE 2_10':{ (191, 111): 2345.75, (192, 111):5644.847 } } """ sample_df = metabolite_df[metabolite_df[multiquant.SAMPLE] == samp] frag_dict = {} for index, row in sample_df.iterrows(): frag_dict[(row[const.PARENT_MASS_ISO], row[const.DAUGHTER_MASS_ISO])] = row[intensity_col] m_n = row[const.DAUGHTER_MASS_ISO] m_1_n = row[const.PARENT_MASS_ISO]-1 m_n_1 = row[const.DAUGHTER_MASS_ISO]-1 intensity_m_n = row[intensity_col] try: intensity_m_1_n = frag_dict[m_1_n, m_n] except KeyError: intensity_m_1_n = 0 try: intensity_m_1_n_1 = frag_dict[m_1_n, m_n_1] except KeyError: intensity_m_1_n_1 = 0 corrected = intensity_m_n * row['A'] - \ intensity_m_1_n * row['B'] - intensity_m_1_n_1 * row['C'] sample_df.set_value( index=index, col=const.NA_CORRECTED_COL, value=corrected) output_df = pd.concat([output_df, sample_df]) return output_df
def test_get_isotope_na_keyerror(): with pytest.raises(KeyError): help.get_isotope_na('PP')
def test_get_isotope_na(): assert help.get_isotope_na('C13') == 0.0111