def test_indent_forest(self): forest = [ '0,1', ['1,1', '1,2', ], '0,2', ['1,2', ['2,1', '2,2', ], '1,3', ] ] indent_by_2 = """0,1 1,1 1,2 0,2 1,2 2,1 2,2 1,3""" self.assertEqual(string.indent_forest(forest, indentation=2), indent_by_2) forest2 = [ '0,1', ["e e cummings\ncould write\n but couldn't code"], '0,2', ] indent_with_text = """0,1 e e cummings could write but couldn't code 0,2""" self.assertEqual(string.indent_forest(forest2, indentation=3), indent_with_text)
def test_indent_forest_with_trailing_blanks(self): test_string1 = 'test_text1\ntest_text2\n\ntest_text4\n \n' test_string2 = 'test_text5\ntest_text6' forest = [test_string1, test_string2] self.assertEqual( string.indent_forest(forest, keep_trailing_blank_lines=True, indentation=0), test_string1 + '\n' + test_string2) self.assertEqual( string.indent_forest(forest, indentation=0), test_string1.rstrip() + '\n' + test_string2)
def post(self): """ Validate that a workbook is consistent with a schema, and report any errors """ """ Returns: :obj:`str`: errors """ args = validate_parser.parse_args() schema_dir, schema_filename = save_schema(args['schema']) wb_dir, wb_filename = save_in_workbook(args['workbook']) try: schema_name, schema, models = get_schema_models(schema_filename) objs = io.Reader().run(wb_filename, schema_name=schema_name, models=models, group_objects_by_model=False, validate=False, **DEFAULT_READER_ARGS) except Exception as err: flask_restplus.abort(400, str(err)) finally: shutil.rmtree(schema_dir) shutil.rmtree(wb_dir) errors = core.Validator().validate(objs) if errors: msg = indent_forest(['The dataset is invalid:', [errors]]) else: msg = 'The dataset is valid' return msg
def _prepare(self): """ Prepare and validate the model, and create simulation metadata """ # prepare & check the model PrepForWcSimTransform().run(self.model) errors = Validator().run(self.model) if errors: raise MultialgorithmError( indent_forest(['The model is invalid:', [errors]]))
def test_indent_forest_with_return_list(self): forest = [ '0,1', ["e e cummings\ncould write\n but couldn't code"], '0,2', ] result = [ "0,1", " e e cummings", " could write", " but couldn't code", "0,2", ] self.assertEqual(string.indent_forest(forest, indentation=3, return_list=True), result)
def run(self, path, models=None, ignore_missing_models=None, ignore_sheet_order=None, include_all_attributes=False, ignore_missing_attributes=None, ignore_extra_attributes=None, ignore_attribute_order=None, group_objects_by_model=True, validate=None): """ Read a list of model objects from file(s) and, optionally, validate them Args: path (:obj:`str`): path to file(s) models (:obj:`types.TypeType` or :obj:`list` of :obj:`types.TypeType`, optional): type of object to read or list of types of objects to read ignore_missing_models (:obj:`bool`, optional): if :obj:`False`, report an error if a worksheet/ file is missing for one or more models ignore_sheet_order (:obj:`bool`, optional): if :obj:`True`, do not require the sheets to be provided in the canonical order include_all_attributes (:obj:`bool`, optional): if :obj:`True`, export all attributes including those not explictly included in `Model.Meta.attribute_order` ignore_missing_attributes (:obj:`bool`, optional): if :obj:`False`, report an error if a worksheet/file doesn't contain all of attributes in a model in `models` ignore_extra_attributes (:obj:`bool`, optional): if :obj:`True`, do not report errors if attributes in the data are not in the model ignore_attribute_order (:obj:`bool`): if :obj:`True`, do not require the attributes to be provided in the canonical order group_objects_by_model (:obj:`bool`, optional): if :obj:`True`, group decoded objects by their types validate (:obj:`bool`, optional): if :obj:`True`, validate the data Returns: :obj:`dict`: model objects grouped by `obj_tables.Model` class Raises: :obj:`ValueError`: if the file defines zero or multiple models or the model defined in the file(s) is invalid """ if issubclass(self.get_reader(path), obj_tables.io.WorkbookReader): Writer.validate_implicit_relationships() if models is None: models = self.MODELS config = wc_lang.config.core.get_config()['wc_lang']['io'] if ignore_missing_models is None: ignore_missing_models = not config['strict'] if ignore_sheet_order is None: ignore_sheet_order = not config['strict'] if ignore_missing_attributes is None: ignore_missing_attributes = not config['strict'] if ignore_extra_attributes is None: ignore_extra_attributes = not config['strict'] if ignore_attribute_order is None: ignore_attribute_order = not config['strict'] objects = super(Reader, self).run( path, models=models, ignore_missing_models=ignore_missing_models, ignore_sheet_order=ignore_sheet_order, include_all_attributes=include_all_attributes, ignore_missing_attributes=ignore_missing_attributes, ignore_extra_attributes=ignore_extra_attributes, ignore_attribute_order=ignore_attribute_order, group_objects_by_model=group_objects_by_model, validate=False) # check that file only has 1 model if len(objects[core.Model]) != 1: raise ValueError('"{}" should define one model'.format(path)) model = objects[core.Model][0] # add implicit relationships to `Model` if issubclass(self.get_reader(path), obj_tables.io.WorkbookReader): for cls, cls_objects in objects.items(): for attr in cls.Meta.attributes.values(): if isinstance(attr, obj_tables.RelatedAttribute) and \ attr.related_class == core.Model: for cls_obj in cls_objects: setattr(cls_obj, attr.name, model) # validate config = wc_lang.config.core.get_config()['wc_lang']['io'] if (validate is not None and validate) or (validate is None and config['validate']): objs = [] for cls_objs in objects.values(): objs.extend(cls_objs) errors = obj_tables.Validator().validate(objs) if errors: raise ValueError( indent_forest([ 'The model cannot be loaded because it fails to validate:', [errors] ])) # return model return objects
def test_indent_forest_with_string(self): forest = 'forest' result = 'forest' self.assertEqual(string.indent_forest(forest, indentation=3), result)
def run(self, core_path, seq_path='', rewrite_seq_path=True, taxon='prokaryote', models=None, ignore_missing_models=None, ignore_extra_models=None, ignore_sheet_order=None, include_all_attributes=False, ignore_missing_attributes=None, ignore_extra_attributes=None, ignore_attribute_order=None, group_objects_by_model=True, validate=True, read_metadata=False): """ Read knowledge base from file(s) Args: core_path (:obj:`str`): path to core knowledge base seq_path (:obj:`str`): path to genome sequence rewrite_seq_path (:obj:`bool`, optional): if :obj:`True`, the path to genome sequence in the knowledge base will be updated to the provided seq_path taxon (:obj:`str`, optional): type of model order to use models (:obj:`types.TypeType` or :obj:`list` of :obj:`types.TypeType`, optional): type of object to read or list of types of objects to read ignore_missing_models (:obj:`bool`, optional): if :obj:`False`, report an error if a worksheet/ file is missing for one or more models ignore_extra_models (:obj:`bool`, optional): if :obj:`True` and all `models` are found, ignore other worksheets or files ignore_sheet_order (:obj:`bool`, optional): if :obj:`True`, do not require the sheets to be provided in the canonical order include_all_attributes (:obj:`bool`, optional): if :obj:`True`, export all attributes including those not explictly included in `Model.Meta.attribute_order` ignore_missing_attributes (:obj:`bool`, optional): if :obj:`False`, report an error if a worksheet/file doesn't contain all of attributes in a model in `models` ignore_extra_attributes (:obj:`bool`, optional): if :obj:`True`, do not report errors if attributes in the data are not in the model ignore_attribute_order (:obj:`bool`): if :obj:`True`, do not require the attributes to be provided in the canonical order group_objects_by_model (:obj:`bool`, optional): if :obj:`True`, group decoded objects by their types validate (:obj:`bool`, optional): if :obj:`True`, validate the data read_metadata (:obj:`bool`, optional): if :obj:`True`, read metadata models Returns: :obj:`dict`: model objects grouped by `obj_tables.Model` class Raises: :obj:`ValueError`: if :obj:`core_path` * Defines multiple knowledge bases or cells * Represents objects that cannot be linked to a knowledge base and/or cell """ if issubclass(self.get_reader(core_path), obj_tables.io.WorkbookReader): Writer.validate_implicit_relationships() if taxon == 'prokaryote': models = PROKARYOTE_MODELS elif taxon == 'eukaryote': models = EUKARYOTE_MODELS else: raise ValueError('Unsupported taxon "{}"'.format(taxon)) if read_metadata: models = list(models) + [ obj_tables.utils.DataRepoMetadata, obj_tables.utils.SchemaRepoMetadata ] ignore_missing_models = True ignore_sheet_order = True config = wc_kb.config.core.get_config()['wc_kb']['io'] if ignore_missing_models is None: ignore_missing_models = not config['strict'] if ignore_extra_models is None: ignore_extra_models = not config['strict'] if ignore_sheet_order is None: ignore_sheet_order = not config['strict'] if ignore_missing_attributes is None: ignore_missing_attributes = not config['strict'] if ignore_extra_attributes is None: ignore_extra_attributes = not config['strict'] if ignore_attribute_order is None: ignore_attribute_order = not config['strict'] # read core objects from file objects = super(Reader, self).run( core_path, schema_name='wc_kb.' + taxon, models=models, ignore_missing_models=ignore_missing_models, ignore_extra_models=ignore_extra_models, ignore_sheet_order=ignore_sheet_order, include_all_attributes=include_all_attributes, ignore_missing_attributes=ignore_missing_attributes, ignore_extra_attributes=ignore_extra_attributes, ignore_attribute_order=ignore_attribute_order, group_objects_by_model=True, validate=False) # Check if sequence pathes are consistent for idx, chromosome in enumerate(objects[wc_kb.core.DnaSpeciesType]): if (chromosome.sequence_path is None) or (chromosome.sequence_path == ''): chromosome.sequence_path = seq_path # Set seq_path to be what is provided to wc_kb.io.Reader() if idx != 0: warnings.warn( 'Same sequence file is associated with multiple chromosomes, ' 'make sure seq file is formatted accordingly!') else: if chromosome.sequence_path != seq_path: warnings.warn( 'Sequence path ({}) provided in KB file ({}) is different from \ seq_path provided to wc_kb.io.Reader ({}).' .format(chromosome.sequence_path, core_path, seq_path)) # check that file has 1 knowledge base if len(objects[core.KnowledgeBase]) != 1: raise ValueError( '"{}" should define one knowledge base'.format(core_path)) kb = objects[core.KnowledgeBase][0] # check that file has 0 or 1 cells if not objects[core.Cell]: cell = None elif len(objects[core.Cell]) == 1: cell = objects[core.Cell][0] else: raise ValueError( '"{}" should define zero or one cells'.format(core_path)) # add implict relationships to `KnowledgeBase` and `Cell` kb.cell = cell for model, model_objects in objects.items(): for attr in model.Meta.attributes.values(): if isinstance(attr, obj_tables.RelatedAttribute ) and attr.related_class == core.Cell: for model_obj in model_objects: setattr(model_obj, attr.name, cell) # link path to genome sequence to the DNA species types if rewrite_seq_path is True if rewrite_seq_path: for dna in Bio.SeqIO.parse(seq_path, "fasta"): species_type = kb.cell.species_types.get_one(id=dna.id) species_type.sequence_path = seq_path # validate config = wc_kb.config.core.get_config()['wc_kb']['io'] if (validate is not None and validate) or (validate is None and config['validate']): objs = [] for cls_objs in objects.values(): objs.extend(cls_objs) errors = obj_tables.Validator().validate(objs) if errors: raise ValueError( indent_forest([ 'The knowledge base cannot be loaded because it fails to validate:', [errors] ])) # if `group_objects_by_model` is False, flatten objects into list if not group_objects_by_model: flat_objects = [] for model_objs in objects.values(): flat_objects.extend(list(model_objs)) objects = flat_objects return objects
def prepare_model(model): PrepForWcSimTransform().run(model) errors = Validator().run(model) if errors: raise ValueError(indent_forest(['The model is invalid:', [errors]]))
def run(self, core_path, seq_path, rewrite_seq_path=True, schema=True, strict=True): """ Read knowledge base from file(s) Args: core_path (:obj:`str`): path to core knowledge base seq_path (:obj:`str`): path to genome sequence rewrite_seq_path (:obj:`bool`, optional): if :obj:`True`, the path to genome sequence in the knowledge base will be updated to the provided seq_path schema (:obj:`bool`, optional): if :obj:`True`, use model order for prokaryote, else use model order for eukaryote strict (:obj:`bool`, optional): if :obj:`True`, validate that the the model file(s) strictly follow the :obj:`obj_model` serialization format: * The worksheets are in the expected order * There are no missing worksheets * There are no extra worksheets * The columns are in the expected order * There are no missing columns * There are no extra columns Returns: :obj:`core.KnowledgeBase`: knowledge base Raises: :obj:`ValueError`: if :obj:`core_path` * Defines multiple knowledge bases or cells * Represents objects that cannot be linked to a knowledge base and/or cell """ Writer.validate_implicit_relationships() if schema: model_order = PROKARYOTE_MODEL_ORDER else: model_order = EUKARYOTE_MODEL_ORDER # read core objects from file _, ext = os.path.splitext(core_path) reader = obj_model.io.get_reader(ext)() kwargs = {} if isinstance(reader, obj_model.io.WorkbookReader): kwargs['include_all_attributes'] = False if not strict: kwargs['ignore_missing_sheets'] = True kwargs['ignore_extra_sheets'] = True kwargs['ignore_sheet_order'] = True kwargs['ignore_missing_attributes'] = True kwargs['ignore_extra_attributes'] = True kwargs['ignore_attribute_order'] = True objects = reader.run( core_path, models=model_order, validate=False, **kwargs) # check that file has 0 or 1 knowledge bases if not objects[core.KnowledgeBase]: for model, model_objects in objects.items(): if model_objects: raise ValueError('"{}" cannot contain instances of `{}` without an instance of `KnowledgeBase`'.format( core_path, model.__name__)) return None elif len(objects[core.KnowledgeBase]) > 1: raise ValueError( '"{}" should define one knowledge base'.format(core_path)) else: kb = objects[core.KnowledgeBase].pop() # check that file has 0 or 1 cells if not objects[core.Cell]: for model, model_objects in objects.items(): if model_objects: raise ValueError('"{}" cannot contain instances of `{}` without an instance of `Cell`'.format( core_path, model.__name__)) cell = None elif len(objects[core.Cell]) > 1: raise ValueError('"{}" should define one cell'.format(core_path)) else: cell = objects[core.Cell].pop() # add implict relationships to `KnowledgeBase` and `Cell` kb.cell = cell for model, model_objects in objects.items(): for attr in model.Meta.attributes.values(): if isinstance(attr, obj_model.RelatedAttribute) and attr.related_class == core.Cell: for model_obj in model_objects: setattr(model_obj, attr.name, cell) # link path to genome sequence to the DNA species types if rewrite_seq_path is True if rewrite_seq_path: for dna in Bio.SeqIO.parse(seq_path, "fasta"): kb.cell.species_types.get_one(id=dna.id).sequence_path = seq_path # validate objs = [] for cls_objs in objects.values(): objs.extend(cls_objs) errors = obj_model.Validator().validate(objs) if errors: raise ValueError( indent_forest(['The knowledge base cannot be loaded because it fails to validate:', [errors]])) # return kb return kb
def make_test_model( cls, model_type, init_vols=None, init_vol_stds=None, density=1100, molecular_weight=10., charge=0, num_submodels=1, default_species_copy_number=1000000, default_species_std=100000, species_copy_numbers=None, species_stds=None, transfer_reactions=False, transform_prep_and_check=True, submodel_framework='WC:stochastic_simulation_algorithm'): """ Create a whole-cell model for testing `wc_sim` software Properties of the model: * Each submodel uses the integration framework in `submodel_framework` * Each submodel has one compartment Args: model_type (:obj:`str`): model type description init_vols (:obj:`list` of :obj:`float`, optional): initial volume of each compartment; default=1E-16 init_vol_stds (:obj:`list` of :obj:`float`, optional): initial std. dev. of volume of each compartment; default=`init_vols/10.` density (:obj:`float`, optional): the density of each compartment; default=1100 g/l molecular_weight (:obj:`float`, optional): the molecular weight of each species type; default=10 charge (:obj:`int`, optional): charge of each species type; default=0 num_submodels (:obj:`int`, optional): number of submodels default_species_copy_number (:obj:`int`, optional): default population of all species in their compartments default_species_std (:obj:`int`, optional): default standard deviation of population of all species in their compartments species_copy_numbers (:obj:`dict`, optional): populations for particular species, which overrides `default_species_copy_number` species_stds (:obj:`dict`, optional): standard deviations for particular species, which overrides `default_species_std` transfer_reactions (:obj:`bool`, optional): whether the model contains transfer reactions between compartments; to be implemented transform_prep_and_check (:obj:`bool`, optional): whether to transform, prepare and check the model submodel_framework (:obj:`str`, optional): the integration fraemwork for submodels; default is `WC:stochastic_simulation_algorithm` Returns: :obj:`Model`: a `wc_lang` model Raises: :obj:`ValueError`: if arguments are inconsistent """ # TODO(Arthur): implement transfer reactions num_species, num_reactions, reversible, rate_law_type = cls.get_model_type_params( model_type) if (2 < num_species or 1 < num_reactions or (0 < num_reactions and num_species == 0) or (rate_law_type == RateLawType.product_pop and num_species != 2)): raise ValueError( "invalid combination of num_species ({}), num_reactions ({}), rate_law_type ({})" .format(num_species, num_reactions, rate_law_type.name)) if num_submodels < 1: raise ValueError( "invalid num_submodels ({})".format(num_submodels)) # Model model = Model(id='test_model', name='{} with {} submodels'.format( model_type, num_submodels), version='0.0.0', wc_lang_version='0.0.1') structure = ChemicalStructure(molecular_weight=molecular_weight, charge=charge) # make compartments default_vol = 1E-16 init_vols = [default_vol ] * num_submodels if init_vols is None else init_vols init_vols = np.asarray(init_vols) init_vol_stds = init_vols / 10. if init_vol_stds is None else np.asarray( init_vol_stds) if len(init_vols) != num_submodels or len( init_vol_stds) != num_submodels: raise ValueError( "len(init_vols) ({}) or len(init_vol_stds) ({}) != num_submodels ({})" .format(len(init_vols), len(init_vol_stds), num_submodels)) # make InitVolumes, which must have unique attributes for round-trip model file equality initial_volumes = {} for i in range(num_submodels): attributes = (init_vols[i], init_vol_stds[i]) if attributes not in initial_volumes: initial_volumes[attributes] = InitVolume( mean=init_vols[i], std=init_vol_stds[i], units=unit_registry.parse_units('l')) compartments = [] for i in range(num_submodels): comp_num = i + 1 init_volume = initial_volumes[(init_vols[i], init_vol_stds[i])] comp = model.compartments.create( id='compt_{}'.format(comp_num), name='compartment num {}'.format(comp_num), biological_type=onto['WC:cellular_compartment'], init_volume=init_volume) comp.init_density = model.parameters.create( id='density_compt_{}'.format(comp_num), value=density, units=unit_registry.parse_units('g l^-1')) compartments.append(comp) # make SpeciesTypes species_types = [] for i in range(num_species): spec_type = model.species_types.create( id='spec_type_{}'.format(i), type=onto['WC:protein'], # protein structure=structure) species_types.append(spec_type) # make submodels expressions = {} for i in range(num_submodels): submodel_num = i + 1 cls.add_test_submodel( model, model_type, submodel_num, compartments[i], species_types, default_species_copy_number=default_species_copy_number, default_species_std=default_species_std, species_copy_numbers=species_copy_numbers, species_stds=species_stds, expressions=expressions, submodel_framework=submodel_framework) if transform_prep_and_check: # prepare & check the model PrepForWcSimTransform().run(model) errors = Validator().run(model) if errors: raise ValueError( indent_forest(['The model is invalid:', [errors]])) # create Manager indices # TODO(Arthur): should be automated in a finalize() method for base_model in [ Submodel, SpeciesType, Reaction, Observable, Compartment, Parameter ]: base_model.get_manager().insert_all_new() return model