class PrepParams(Params): defaults = Munch( protease=None, decoy_mode=None, include_misses=0, n_peps_limit=None, drop_duplicates=False, n_ptms_limit=None, ) schema = s( s.is_kws_r( protease=s.is_list(noneable=True, elems=s.is_str()), decoy_mode=s.is_str(noneable=True), include_misses=s.is_int(), n_peps_limit=s.is_int(noneable=True), drop_duplicates=s.is_bool(), n_ptms_limit=s.is_int(noneable=True), proteins=s.is_list( s.is_kws( name=s.is_str(required=True), sequence=s.is_str(required=True), ptm_locs=s.is_str(noneable=True), report=s.is_int(noneable=True), abundance=s.is_number(noneable=True), )), ))
def it_returns_required_elems(): userdata = dict(some_key=1) test_s = s( s.is_dict( all_required=True, elems=dict( a=s.is_int(), b=s.is_float(help="A float"), c=s.is_number(), d=s.is_str(userdata=userdata), e=s.is_list(), f=s.is_dict(all_required=True, elems=dict(d=s.is_int(), e=s.is_int())), ), )) reqs = test_s.requirements() assert reqs == [ ("a", int, None, None), ("b", float, "A float", None), ("c", float, None, None), ("d", str, None, userdata), ("e", list, None, None), ("f", dict, None, None), ]
class SigprocV1Params(Params): defaults = dict( hat_rad=2, iqr_rng=96, threshold_abs=1.0, channel_indices_for_alignment=None, channel_indices_for_peak_finding=None, radiometry_channels=None, save_debug=False, peak_find_n_cycles=4, peak_find_start=0, radial_filter=None, anomaly_iqr_cutoff=95, n_fields_limit=None, save_full_signal_radmat_npy=False, ) schema = s( s.is_kws_r( anomaly_iqr_cutoff=s.is_number(noneable=True, bounds=(0, 100)), radial_filter=s.is_float(noneable=True, bounds=(0, 1)), peak_find_n_cycles=s.is_int(bounds=(1, None), noneable=True), peak_find_start=s.is_int(bounds=(0, None), noneable=True), save_debug=s.is_bool(), hat_rad=s.is_int(bounds=(1, 3)), iqr_rng=s.is_number(noneable=True, bounds=(0, 100)), threshold_abs=s.is_number( bounds=(0, 100)), # Not sure of a reasonable bound channel_indices_for_alignment=s.is_list(s.is_int(), noneable=True), channel_indices_for_peak_finding=s.is_list(s.is_int(), noneable=True), radiometry_channels=s.is_dict(noneable=True), n_fields_limit=s.is_int(noneable=True), save_full_signal_radmat_npy=s.is_bool(), )) def validate(self): # Note: does not call super because the override_nones is set to false here self.schema.apply_defaults(self.defaults, apply_to=self, override_nones=False) self.schema.validate(self, context=self.__class__.__name__) if self.radiometry_channels is not None: pat = re.compile(r"[0-9a-z_]+") for name, channel_i in self.radiometry_channels.items(): self._validate( pat.fullmatch(name), "radiometry_channels name must be lower-case alphanumeric (including underscore)", ) self._validate(isinstance(channel_i, int), "channel_i must be an integer") def set_radiometry_channels_from_input_channels_if_needed( self, n_channels): if self.radiometry_channels is None: # Assume channels from nd2 manifest channels = list(range(n_channels)) self.radiometry_channels = {f"ch_{ch}": ch for ch in channels} @property def n_output_channels(self): return len(self.radiometry_channels.keys()) @property def n_input_channels(self): return len(self.radiometry_channels.keys()) @property def channels_cycles_dim(self): # This is a cache set in sigproc_v1. # It is a helper for the repeative call: # n_outchannels, n_inchannels, n_cycles, dim = return self._outchannels_inchannels_cycles_dim def _input_channels(self): """ Return a list that converts channel number of the output to the channel of the input Example: input might have channels ["foo", "bar"] the radiometry_channels has: {"bar": 0}] Thus this function returns [1] because the 0th output channel is mapped to the "1" input channel """ return [ self.radiometry_channels[name] for name in sorted(self.radiometry_channels.keys()) ] # def input_names(self): # return sorted(self.radiometry_channels.keys()) def output_channel_to_input_channel(self, out_ch): return self._input_channels()[out_ch] def input_channel_to_output_channel(self, in_ch): """Not every input channel necessarily has an output; can return None""" return utils.filt_first_arg(self._input_channels(), lambda x: x == in_ch)
def it_validates_number(): test_s = s(s.is_number()) test_s.validate(1.0) test_s.validate(1) with zest.raises(SchemaValidationFailed): test_s.validate("a str")
class PrepParams(Params): PHOTOBLEACHING_PSEUDO_AA = "X" ALLOW_NONES_AND_NANS_IN_ABUNDANCE = False NORMALIZE_ABUNDANCE = False # Abundance is normalized in gen defaults = Munch( protease=None, decoy_mode=None, include_misses=0, n_peps_limit=None, drop_duplicates=False, n_ptms_limit=None, is_photobleaching_run=False, photobleaching_n_cycles=None, photobleaching_run_n_dye_count=None, ) schema = s( s.is_kws_r( protease=s.is_list(noneable=True, elems=s.is_str()), decoy_mode=s.is_str(noneable=True), include_misses=s.is_int(), n_peps_limit=s.is_int(noneable=True), drop_duplicates=s.is_bool(), n_ptms_limit=s.is_int(noneable=True), proteins=s.is_list( s.is_kws( name=s.is_str(required=True), sequence=s.is_str(required=True), ptm_locs=s.is_str(noneable=True), is_poi=s.is_int(noneable=True), abundance=s.is_number(noneable=True), )), is_photobleaching_run=s.is_bool(), photobleaching_n_cycles=s.is_int(noneable=True), photobleaching_run_n_dye_count=s.is_int(noneable=True), )) def validate(self): super().validate() # Try to normalize abundance values if provided. If abundance values are provided, do basic validation. # If no abundance values are provided, do nothing. # When a protein csv with no abundance columns is provided, it will come through as all nans # Note that self.proteins is likely a list of Munches, but could be a list of dicts, so don't assume we can access items as attrs abundance_info_present = any( "abundance" in protein and protein["abundance"] is not None and not math.isnan(protein["abundance"]) for protein in self.proteins) if abundance_info_present: abundance_criteria = [ (lambda protein: "abundance" in protein, "Abundance missing"), ( lambda protein: protein["abundance"] >= 0 if protein["abundance"] is not None else True, "Abundance must be greater than or equal to zero", ), ] if not self.ALLOW_NONES_AND_NANS_IN_ABUNDANCE: abundance_criteria += [ ( lambda protein: protein["abundance"] is not None, "Abundance must not be None", ), ( lambda protein: not math.isnan(protein["abundance"]), "Abundance must not be NaN", ), ] # Find min abundance value, also check for zeros and NaNs and error if found min_abundance = None for protein in self.proteins: # Check to make sure abundance passes criteria for criteria_fn, msg in abundance_criteria: if not criteria_fn(protein): abundance_value = protein.get("abundance") raise SchemaValidationFailed( f"Protein {protein.get('name')} has invalid abundance: {abundance_value} - {msg}" ) # Find min abundance value if (min_abundance is None or protein["abundance"] < min_abundance) and protein["abundance"] > 0: min_abundance = protein["abundance"] if self.NORMALIZE_ABUNDANCE: if min_abundance != 1: log.info("abundance data is not normalized, normalizing.") # normalize abundance by min value for protein in self.proteins: if protein["abundance"] is not None: protein["abundance"] /= min_abundance else: # Abundance information is missing from all proteins # Set abudance to 1 for protein in self.proteins: protein["abundance"] = 1