def peak_fits(self): df = self._load_df_prop_from_fields("peak_fit_df") check.df_t(df, self.peak_fit_df_schema) # The peaks have a local frame_peak_i but they # don't have their pan-field peak_i set yet. df = df.reset_index(drop=True) df.peak_i = df.index return df
def it_radmats(): rad_df = res.radmats() check.df_t(rad_df, SigprocV1Result.radmat_df_schema) assert len(rad_df) == 4 * 2 * 3 # Sanity check a few assert (rad_df[(rad_df.peak_i == 1) & (rad_df.channel_i == 1) & (rad_df.cycle_i == 1)].signal.values[0] == 1.0) assert (rad_df[(rad_df.peak_i == 2) & (rad_df.channel_i == 0) & (rad_df.cycle_i == 0)].signal.values[0] == 5.0)
def peaks(self, fields=None, n_peaks_subsample=None): df = self._load_df_prop_from_fields( "peak_df", field_iz=self._fields_to_field_iz(fields)) check.df_t(df, self.peak_df_schema) if self._has_prop("peak_fit_df"): fit_df = self._load_df_prop_from_fields( "peak_fit_df", field_iz=self._fields_to_field_iz(fields)) check.df_t(df, self.peak_fit_df_schema) df = df.set_index(["field_i", "field_peak_i"]).join( fit_df.set_index(["field_i", "field_peak_i"])) # The peaks have a local frame_peak_i but they # don't have their pan-field peak_i set yet. df = df.reset_index(drop=True) df.peak_i = df.index if n_peaks_subsample is not None: df = df.sample(n_peaks_subsample, replace=True) return df
def it_mask_rects(): rects_df = res.mask_rects() check.df_t(rects_df, SigprocV1Result.mask_rects_df_schema) assert len(rects_df) == 2 * 2 * 3
def _do_ptm_permutations(df, n_ptms_limit): """ df is a dataframe with a single pep_i, and pro_ptm_locs, and pep_offset_in_pro and aa for each location in the peptide """ check.df_t( df, dict(pep_i=int, pro_ptm_locs=object, pep_offset_in_pro=int, aa=object), allow_extra_columns=True, ) # pro_ptm_locs is identical for all rows pro_ptm_locs = df.pro_ptm_locs.values[0] if not pro_ptm_locs: return [] # get 0-based indices from string representation; these are for the # entire protein. ptm_locs_zero_based = [(int(x) - 1) for x in pro_ptm_locs.split(";")] # get the ptms that coincide with the range spanned by this peptide. min_pos = df.pep_offset_in_pro.min() max_pos = df.pep_offset_in_pro.max() ptm_locs_zero_based = [ x for x in ptm_locs_zero_based if min_pos <= x <= max_pos ] n_ptms = len(ptm_locs_zero_based) if n_ptms > n_ptms_limit: _info( f"Skipping ptm for peptide {df.pep_i.iloc[0]} with {n_ptms} PTMs") if n_ptms_limit is not None and n_ptms > n_ptms_limit: return [] powerset = [ np.array(x) for length in range(1, len(ptm_locs_zero_based) + 1) for x in itertools.combinations(ptm_locs_zero_based, length) ] # powerset is a list of tuples. So if have [2,4,10] in ptms you get # powerset = [ (2), (4), (10), (2,4), (2,10), (4,10), (2,4,10) ] # # The goal is to make a new peptide+seq for each of those tuples by # adding the modification '[p]' to the aa at that seq index location # mod = "[p]" new_pep_seqs = [] for ptm_locs in powerset: new_pep_seq = df.copy() new_pep_seq.pep_i = np.nan new_pep_seq = new_pep_seq.set_index("pep_offset_in_pro") new_pep_seq.at[ptm_locs, "aa"] = new_pep_seq.aa[ptm_locs] + mod new_pep_seq = new_pep_seq.reset_index() new_pep_seqs += [new_pep_seq] return new_pep_seqs
def mask_rects(self): df = self._load_df_prop_from_fields("mask_rects_df") check.df_t(df, self.mask_rects_df_schema) return df
def fields(self, fields=None): df = self._load_df_prop_from_fields( "field_df", field_iz=self._fields_to_field_iz(fields)) check.df_t(df, self.field_df_schema, allow_extra_columns=True) return df
def fields(self): df = self._load_df_prop_from_all_fields("field_df") check.df_t(df, self.field_df_schema, allow_extra_columns=True) return df