class ImsImportParams(Params): defaults = Munch( is_movie=False, start_field=0, n_fields_limit=None, start_cycle=0, n_cycles_limit=None, dst_ch_i_to_src_ch_i=None, is_z_stack_single_file=False, z_stack_n_slices_per_field=None, ) # Note that in movie mode what is called "field" is really the "frame" since the # stage does not move between shots. # The single .nd2 file in movie mode then treats the "fields" as if they are "cycles" # of a single field. schema = s( s.is_kws_r( is_movie=s.is_bool(noneable=True), start_field=s.is_int(), n_fields_limit=s.is_int(noneable=True), start_cycle=s.is_int(noneable=True), n_cycles_limit=s.is_int(noneable=True), dst_ch_i_to_src_ch_i=s.is_list(elems=s.is_int(), noneable=True), is_z_stack_single_file=s.is_bool(), z_stack_n_slices_per_field=s.is_int(noneable=True), ))
def it_validates_default_list_elems_int(): test_s = s(s.is_list(elems=s.is_int())) test_s.validate([1, 2, 3]) with zest.raises(SchemaValidationFailed): test_s.validate(1) with zest.raises(SchemaValidationFailed): test_s.validate([1, "str"])
def it_shows_help(): schema = s( s.is_kws(a=s.is_dict( help="Help for a", elems=dict( b=s.is_int(help="Help for b"), c=s.is_kws(d=s.is_int(help="Help for d")), ), ))) schema.help() help_calls = m_print_help.normalized_calls() help_calls = [{h["key"]: h["help"]} for h in help_calls] assert help_calls == [ { "root": None }, { "a": "Help for a" }, { "b": "Help for b" }, { "c": None }, { "d": "Help for d" }, ]
def it_validates_allow_empty_string(): test_s = s(s.is_str(allow_empty_string=False)) test_s.validate("test") with zest.raises(SchemaValidationFailed): test_s.validate("") test_s = s(s.is_str()) test_s.validate("test") test_s.validate("")
class LNFitParams(Params): defaults = Munch(photometry_only=False) schema = s( s.is_kws_r( dye_on_threshold=s.is_int(), photometry_only=s.is_bool(), lognormal_fitter_v2_params=s.is_str(), ))
def it_returns_required_elems(): userdata = dict(some_key=1) test_s = s( s.is_dict( all_required=True, elems=dict( a=s.is_int(), b=s.is_float(help="A float"), c=s.is_number(), d=s.is_str(userdata=userdata), e=s.is_list(), f=s.is_dict(all_required=True, elems=dict(d=s.is_int(), e=s.is_int())), ), )) reqs = test_s.requirements() assert reqs == [ ("a", int, None, None), ("b", float, "A float", None), ("c", float, None, None), ("d", str, None, userdata), ("e", list, None, None), ("f", dict, None, None), ]
def it_fetches_user_data(): schema = s( s.is_dict( help="Help for a", elems=dict( b=s.is_int(help="Help for b", userdata="userdata_1"), c=s.is_kws(d=s.is_int(help="Help for d")), ), )) tlf = schema.top_level_fields() assert tlf[0][0] == "b" and tlf[0][3] == "userdata_1"
class KNNTrainV1Params(Params): defaults = Munch( n_subsample=None, n_neighbors=8, ) schema = s( s.is_kws_r( n_subsample=s.is_int(noneable=True), n_neighbors=s.is_int(), ))
def it_validates_recursively(): test_s = s( s.is_dict(elems=dict( a=s.is_int(), b=s.is_list(required=True, elems=s.is_str()), c=s.is_dict(required=True), ))) test_s.validate(dict(a=1, b=["a", "b"], c=dict())) with zest.raises(SchemaValidationFailed): test_s.validate(dict(a=1, b=[1], c=dict())) with zest.raises(SchemaValidationFailed): test_s.validate(dict(a=1, b=["a"], c=1))
class RadFilterParams(Params): defaults = Munch( field_quality_thresh=450.0, dark_thresh_in_stds=4.0, noi_thresh_in_stds=2.5, ) schema = s( s.is_kws_r( field_quality_thresh=s.is_float(), dark_thresh_in_stds=s.is_float(), noi_thresh_in_stds=s.is_float(), ) )
class CalibNNParams(Params): defaults = Munch() schema = s( s.is_kws_r( mode=s.is_str(), n_pres=s.is_int(), n_mocks=s.is_int(), n_edmans=s.is_int(), dye_names=s.is_list(s.is_str()), scope_name=s.is_str(), channels=s.is_list(s.is_int()), ) )
class TestGen(BaseGenerator): schema = s( s.is_kws_r( n_edmans=s.is_int(help="Number of Edman cycles"), n_pres=s.is_int(), protease=s.is_list(noneable=True, elems=s.is_str()), label_set=s.is_list(s.is_str()), scheme=s.is_list(s.is_str()), )) defaults = Munch(n_pres=0, scheme=[])
class SigprocV2CalibGenerator(BaseGenerator): schema = s(s.is_kws_r(**BaseGenerator.sigproc_source_schema.schema(), )) def generate(self): runs = [] if len(self.sigproc_source) != 1: raise ValueError(f"Calibrations can have only one sigproc_source") sigproc_source = self.sigproc_source[0] ims_import_task = self.ims_imports(sigproc_source) run = Munch(run_name=f"sigproc_v2_calib", **ims_import_task) if self.force_run_name is not None: run.run_name = self.force_run_name self.report_section_run_object(run) template = "sigproc_v2_calib_template.ipynb" self.report_section_from_template(template) runs += [run] n_runs = len(runs) self.report_preamble( utils.smart_wrap(f""" # Sigproc V2 Calibration ## {n_runs} run(s) processed. """)) return runs
def it_bounds_min(): test_s = s(s.is_list(min_len=2)) test_s.validate([1, 2]) with zest.raises(SchemaValidationFailed): test_s.validate([1]) with zest.raises(SchemaValidationFailed): test_s.validate([])
def it_validates_type(): test_s = s(s.is_type(TestType)) test_s.validate(TestType()) with zest.raises(SchemaValidationFailed): test_s.validate("a str") with zest.raises(SchemaValidationFailed): test_s.validate(1.0)
def it_validates_float(): test_s = s(s.is_float()) test_s.validate(1.0) with zest.raises(SchemaValidationFailed): test_s.validate("a str") with zest.raises(SchemaValidationFailed): test_s.validate(1)
def it_raises_if_bounds_not_valid(): with zest.raises(SchemaInvalid): s._check_bounds_arg(bounds=4) with zest.raises(SchemaInvalid): s._check_bounds_arg(bounds=("a", "b")) with zest.raises(SchemaInvalid): s._check_bounds_arg(bounds=())
def _before(): nonlocal test_s test_s = s( s.is_dict( all_required=True, elems=dict( a=s.is_int(), b=s.is_int(), c=s.is_dict(all_required=True, elems=dict(d=s.is_int(), e=s.is_int())), ), ))
class RFTrainV2Params(Params): defaults = Munch( n_subsample=None, n_estimators=10, min_samples_leaf=50, max_depth=None, max_features="auto", max_leaf_nodes=None, ) schema = s( s.is_kws_r( n_subsample=s.is_int(noneable=True), n_estimators=s.is_int(), min_samples_leaf=s.is_int(), max_depth=s.is_int(noneable=True), max_features=s.is_type(object), max_leaf_nodes=s.is_int(noneable=True), ))
def it_ignores_underscored_keys(): test_s = s( s.is_dict(elems=dict(a=s.is_int(), b=s.is_int()), no_extras=True)) with zest.raises(SchemaValidationFailed): test_s.validate(dict(a=1, b="str")) with zest.raises(SchemaValidationFailed): test_s.validate(dict(a=1, b=2, _c=[])) test_s = s( s.is_dict( elems=dict(a=s.is_int(), b=s.is_int()), no_extras=True, ignore_underscored_keys=True, )) test_s.validate(dict(a=1, b=2, _c=[]))
class SigprocV2CalibGenerator(BaseGenerator): """ PSF Calib takes a z-stack movie of single dye-count (future: multi-channel single dye count). """ schema = s(s.is_kws_r(**BaseGenerator.sigproc_v2_calib_schema.schema(), )) def generate(self): runs = [] assert isinstance(self.sigproc_source, str) ims_import_task = task_templates.ims_import( self.sigproc_source, is_movie=(self.mode == "psf" or self.movie)) # See note above. Only one option at moment modes = dict(illum=sigproc_v2_common.SIGPROC_V2_ILLUM_CALIB, ) mode = modes.get(self.mode) assert mode is not None sigproc_v2_calib_task = task_templates.sigproc_v2_calib(mode=mode) run = Munch( run_name=f"sigproc_v2_calib", **ims_import_task, **sigproc_v2_calib_task, ) if self.force_run_name is not None: run.run_name = self.force_run_name runs += [run] self.static_reports += ["sigproc_calib"] return runs
class KNNV1Params(ParamsAndPriors): defaults = Munch(n_neighbors=8, ) schema = s(s.is_kws_r(n_neighbors=s.is_int(), ))
class Gen(BaseGenerator): schema = s(s.is_kws(label_set=s.is_list(s.is_str())))
class ClassifyRFParams(Params): defaults = Munch() schema = s(s.is_kws_r())
class SigprocV2Params(Params): defaults = dict( radiometry_channels=None, n_fields_limit=None, save_full_signal_radmat_npy=False, # use_cycle_zero_psfs_only=False, ) schema = s( s.is_kws_r( radiometry_channels=s.is_dict(noneable=True), n_fields_limit=s.is_int(noneable=True), save_full_signal_radmat_npy=s.is_bool(), calibration=s.is_dict(), instrument_subject_id=s.is_str(noneable=True), # use_cycle_zero_psfs_only=s.is_bool(), )) def validate(self): # Note: does not call super because the override_nones is set to false here self.schema.apply_defaults(self.defaults, apply_to=self, override_nones=False) self.schema.validate(self, context=self.__class__.__name__) self.calibration = Calibration(self.calibration) if self.instrument_subject_id is not None: self.calibration.filter_subject_ids(self.instrument_subject_id) if len(self.calibration.keys()) == 0: raise ValueError( f"All calibration records removed after filter_subject_ids on subject_id '{self.instrument_subject_id}'" ) assert not self.calibration.has_subject_ids() if self.radiometry_channels is not None: pat = re.compile(r"[0-9a-z_]+") for name, channel_i in self.radiometry_channels.items(): self._validate( pat.fullmatch(name), "radiometry_channels name must be lower-case alphanumeric (including underscore)", ) self._validate(isinstance(channel_i, int), "channel_i must be an integer") def set_radiometry_channels_from_input_channels_if_needed( self, n_channels): if self.radiometry_channels is None: # Assume channels from nd2 manifest channels = list(range(n_channels)) self.radiometry_channels = {f"ch_{ch}": ch for ch in channels} @property def n_output_channels(self): return len(self.radiometry_channels.keys()) @property def n_input_channels(self): return len(self.radiometry_channels.keys()) # @property # def channels_cycles_dim(self): # # This is a cache set in sigproc_v1. # # It is a helper for the repetitive call: # # n_outchannels, n_inchannels, n_cycles, dim = # return self._outchannels_inchannels_cycles_dim def _input_channels(self): """ Return a list that converts channel number of the output to the channel of the input Example: input might have channels ["foo", "bar"] the radiometry_channels has: {"bar": 0}] Thus this function returns [1] because the 0th output channel is mapped to the "1" input channel """ return [ self.radiometry_channels[name] for name in sorted(self.radiometry_channels.keys()) ] # def input_names(self): # return sorted(self.radiometry_channels.keys()) def output_channel_to_input_channel(self, out_ch): return self._input_channels()[out_ch] def input_channel_to_output_channel(self, in_ch): """Not every input channel necessarily has an output; can return None""" return utils.filt_first_arg(self._input_channels(), lambda x: x == in_ch)
class SigprocV2Generator(BaseGenerator): """ Examine sigprocv2(s) and study their results """ schema = s( s.is_kws_r( **BaseGenerator.job_setup_schema.schema(), **BaseGenerator.lnfit_schema.schema(), **BaseGenerator.sigproc_source_schema.schema(), **BaseGenerator.sigproc_v2_schema.schema(), ) ) defaults = Munch( lnfit_name=None, lnfit_params=None, lnfit_dye_on_threshold=None, movie=False, n_frames_limit=None, ) def generate(self): run_descs = [] calibration = Calibration.from_yaml(self.calibration_file) sigproc_tasks = self.sigprocs_v2( calibration=calibration, instrument_subject_id=self.instrument_subject_id, ) if len(sigproc_tasks) == 0: raise ValueError( "No sigprocv2 tasks were found. This might be due to an empty block of another switch." ) for sigproc_i, sigproc_task in enumerate(sigproc_tasks): lnfit_tasks = self.lnfits() sigproc_source = "" for k, v in sigproc_task.items(): if "ims_import" in k: sigproc_source = local.path(v.inputs.src_dir).name break run_name = f"sigproc_v2_{sigproc_i}_{sigproc_source}" if self.force_run_name is not None: run_name = self.force_run_name run_desc = Munch(run_name=run_name, **sigproc_task, **lnfit_tasks,) sigproc_template = "sigproc_v2_template.ipynb" if self.movie: sigproc_template = "sigproc_v2_movie_template.ipynb" self.report_section_markdown(f"# RUN {run_desc.run_name}\n") self.report_section_run_object(run_desc) self.report_section_from_template(sigproc_template) if lnfit_tasks: self.report_section_from_template("lnfit_template.ipynb") run_descs += [run_desc] n_run_descs = len(run_descs) self.report_preamble( utils.smart_wrap( f""" # Signal Processing Overview ## {n_run_descs} run(s) processed. """ ) ) return run_descs
class PTMGenerator(BaseGenerator): """ Use one set of labels to identify peptides and another label to measure quantities of PTM forms. Assumptions: * Only one label_set channel has PTMs in it. Generator-specific arguments: @--ptm-peptide="P10000" # Peptide to examine; Required and Repeatable """ schema = s( s.is_kws_r( **BaseGenerator.job_setup_schema.schema(), **BaseGenerator.protein_schema.schema(), **BaseGenerator.label_set_schema.schema(), **BaseGenerator.scope_run_schema.schema(), **BaseGenerator.peptide_setup_schema.schema(), **BaseGenerator.error_model_schema.schema(), ptm_protein_of_interest=s.is_list( s.is_str(allow_empty_string=False), help="The name of the protein to look for PTMs", ), ptm_label=s.is_str(allow_empty_string=False, help="The PTM label"), n_peptides_limit=s.is_int( noneable=True, help="Useful for debugging to limit peptide counts" ), ) ) defaults = Munch( n_edmans=10, n_pres=1, n_mocks=0, decoys="none", random_seed=None, ptm_label="S[p]T[p]", dye_beta=[7500.0], dye_sigma=[0.16], ) def apply_defaults(self): super().apply_defaults() # Plumbum creates empty lists on list switches. This means # that the apply defaults doesn't quite work right. # TASK: Find a cleaner solution. For now hard-code if len(self.dye_beta) == 0: self.dye_beta = self.defaults.dye_beta if len(self.dye_sigma) == 0: self.dye_sigma = self.defaults.dye_sigma def generate(self): runs = [] for protease, aa_list, err_set in self.run_parameter_permutator(): # GENERATE e-block e_block = self.erisyon_block(aa_list, protease, err_set) ptm_labels = re.compile(r"[A-Z]\[.\]", re.IGNORECASE).findall( self.ptm_label ) # This feels a likely hacky ptm_aas = "".join([i[0] for i in ptm_labels]) if ptm_aas not in aa_list: aa_list = tuple(list(aa_list) + [ptm_aas]) # GENERATE the usual non-ptm prep, sim, train prep_task = task_templates.prep( self.protein, protease, self.decoys, n_peptides_limit=self.n_peptides_limit, proteins_of_interest=self.proteins_of_interest, ) sim_task = task_templates.sim( list(aa_list), n_pres=self.n_pres, n_mocks=self.n_mocks, n_edmans=self.n_edmans, dye_beta=self.dye_beta, dye_sigma=self.dye_sigma, ptm_labels=ptm_labels, ) train_task = task_templates.train_rf() # GENERATE the ptm tasks ptm_train_rf_task = task_templates.ptm_train_rf( ptm_labels, self.ptm_protein_of_interest ) ptm_classify_test_rf_task = task_templates.ptm_classify_test_rf() # CREATE the run run = Munch( run_name=self.run_name(aa_list, protease, err_set), **e_block, **prep_task, **sim_task, **train_task, **ptm_train_rf_task, **ptm_classify_test_rf_task, ) runs += [run] self.report_section_run_array(runs, to_load=["plaster", "sim", "prep", "ptm"]) self.report_section_from_template("ptm_template.ipynb") n_runs = len(runs) self.report_preamble( utils.smart_wrap( f""" # PTM Report ## {n_runs} run(s) processed. """ ) ) return runs
class SigprocV1Generator(BaseGenerator): """ Examine sigprocv2(s) and study their results """ schema = s( s.is_kws_r( **BaseGenerator.job_setup_schema.schema(), **BaseGenerator.lnfit_schema.schema(), **BaseGenerator.sigproc_source_schema.schema(), **BaseGenerator.sigproc_v1_schema.schema(), ) ) defaults = Munch( lnfit_name=None, lnfit_params=None, lnfit_dye_on_threshold=None, lnfit_photometry_only=None, movie=False, radial_filter=None, peak_find_n_cycles=4, peak_find_start=0, start_cycle=0, anomaly_iqr_cutoff=95, sample="unknown", ) def generate(self): run_descs = [] sigproc_tasks = self.sigprocs_v1() if len(sigproc_tasks) == 0: raise ValueError( "No sigprocv2 tasks were found. This might be due to an empty block of another switch." ) for sigproc_i, sigproc_task in enumerate(sigproc_tasks): lnfit_tasks = self.lnfits(sigproc_version="v1") sigproc_source = "" for k, v in sigproc_task.items(): if "ims_import" in k: sigproc_source = local.path(v.inputs.src_dir).name break # Replace invalid chars with underscores symbol_pat = re.compile(r"[^a-z0-9_]") sigproc_source = re.sub(symbol_pat, "_", sigproc_source.lower()) run_name = f"sigproc_v1_{sigproc_i}_{sigproc_source}" assert utils.is_symbol(run_name) if self.force_run_name is not None: run_name = self.force_run_name run_desc = Munch(run_name=run_name, **sigproc_task, **lnfit_tasks,) sigproc_template = "sigproc_v1_template.ipynb" if self.movie: sigproc_template = "sigproc_v1_movie_template.ipynb" self.report_section_markdown(f"# RUN {run_desc.run_name}\n") self.report_section_run_object(run_desc) self.report_section_from_template(sigproc_template) if lnfit_tasks: self.report_section_from_template("lnfit_template.ipynb") run_descs += [run_desc] n_run_descs = len(run_descs) self.report_preamble( utils.smart_wrap( f""" # Signal Processing Overview ## {n_run_descs} run(s) processed. """, width=None, ) ) return run_descs
class RFV2Params(Params): defaults = Munch() schema = s(s.is_kws_r())
class CalibNNV1Generator(SigprocV1Generator): """ Import calibration runs """ schema = s( s.is_kws_r( **SigprocV1Generator.schema.schema(), **BaseGenerator.scope_run_schema.schema(), mode=s.is_str(help=f"Current modes are: [{', '.join(modes)}]", userdata=dict(cli=True)), channel=s.is_list(s.is_int(), help=f"Channel list to include"), dye_names=s.is_str( help= "Dye names of each channel; will be saved with this scope.", userdata=dict(cli=True), ), scope_name=s.is_str( help="Scope name, will be saved with this scope.", userdata=dict(cli=True), ), )) def generate(self): runs = [] sigproc_tasks = self.sigprocs_v1() if len(self.sigproc_source) != 1: raise ValueError(f"Calibrations can have only one sigproc_source") if self.mode not in modes: raise ValueError(f"Unknown calib mode {self.mode}") sigproc_task = sigproc_tasks[0] calib_task = task_templates.calib_nn_v1( mode=self.mode, n_pres=self.n_pres, n_mocks=self.n_mocks, n_edmans=self.n_edmans, dye_names=self.dye_names, scope_name=self.scope_name, channels=self.channel, ) run = Munch( run_name=f"calib_{self.mode}", **sigproc_task, **calib_task, ) self.report_section_run_object(run) calib_template = "calib_nn_template.ipynb" self.report_section_from_template(calib_template) runs += [run] n_runs = len(runs) self.report_preamble( utils.smart_wrap( f""" # Calib Overview ## {n_runs} run(s) processed. """, width=None, )) return runs