Пример #1
0
    def test_indent_forest(self):
        forest = [
            '0,1',
            ['1,1', '1,2', ],
            '0,2',
            ['1,2',
             ['2,1', '2,2', ],
             '1,3', ]
        ]
        indent_by_2 = """0,1
  1,1
  1,2
0,2
  1,2
    2,1
    2,2
  1,3"""
        self.assertEqual(string.indent_forest(forest, indentation=2), indent_by_2)

        forest2 = [
            '0,1',
            ["e e cummings\ncould write\n   but couldn't code"],
            '0,2',
        ]
        indent_with_text = """0,1
   e e cummings
   could write
      but couldn't code
0,2"""
        self.assertEqual(string.indent_forest(forest2, indentation=3), indent_with_text)
Пример #2
0
 def test_indent_forest_with_trailing_blanks(self):
     test_string1 = 'test_text1\ntest_text2\n\ntest_text4\n   \n'
     test_string2 = 'test_text5\ntest_text6'
     forest = [test_string1, test_string2]
     self.assertEqual(
         string.indent_forest(forest, keep_trailing_blank_lines=True, indentation=0),
         test_string1 + '\n' + test_string2)
     self.assertEqual(
         string.indent_forest(forest, indentation=0),
         test_string1.rstrip() + '\n' + test_string2)
Пример #3
0
    def post(self):
        """ Validate that a workbook is consistent with a schema, and report any errors
        """
        """
        Returns:
            :obj:`str`: errors
        """
        args = validate_parser.parse_args()
        schema_dir, schema_filename = save_schema(args['schema'])
        wb_dir, wb_filename = save_in_workbook(args['workbook'])

        try:
            schema_name, schema, models = get_schema_models(schema_filename)
            objs = io.Reader().run(wb_filename,
                                   schema_name=schema_name,
                                   models=models,
                                   group_objects_by_model=False,
                                   validate=False,
                                   **DEFAULT_READER_ARGS)
        except Exception as err:
            flask_restplus.abort(400, str(err))
        finally:
            shutil.rmtree(schema_dir)
            shutil.rmtree(wb_dir)

        errors = core.Validator().validate(objs)
        if errors:
            msg = indent_forest(['The dataset is invalid:', [errors]])
        else:
            msg = 'The dataset is valid'

        return msg
Пример #4
0
 def _prepare(self):
     """ Prepare and validate the model, and create simulation metadata
     """
     # prepare & check the model
     PrepForWcSimTransform().run(self.model)
     errors = Validator().run(self.model)
     if errors:
         raise MultialgorithmError(
             indent_forest(['The model is invalid:', [errors]]))
Пример #5
0
 def test_indent_forest_with_return_list(self):
     forest = [
         '0,1',
         ["e e cummings\ncould write\n   but couldn't code"],
         '0,2',
     ]
     result = [
         "0,1",
         "   e e cummings",
         "   could write",
         "      but couldn't code",
         "0,2",
     ]
     self.assertEqual(string.indent_forest(forest, indentation=3, return_list=True), result)
Пример #6
0
    def run(self,
            path,
            models=None,
            ignore_missing_models=None,
            ignore_sheet_order=None,
            include_all_attributes=False,
            ignore_missing_attributes=None,
            ignore_extra_attributes=None,
            ignore_attribute_order=None,
            group_objects_by_model=True,
            validate=None):
        """ Read a list of model objects from file(s) and, optionally, validate them

        Args:
            path (:obj:`str`): path to file(s)
            models (:obj:`types.TypeType` or :obj:`list` of :obj:`types.TypeType`, optional): type
                of object to read or list of types of objects to read
            ignore_missing_models (:obj:`bool`, optional): if :obj:`False`, report an error if a worksheet/
                file is missing for one or more models
            ignore_sheet_order (:obj:`bool`, optional): if :obj:`True`, do not require the sheets to be provided
                in the canonical order
            include_all_attributes (:obj:`bool`, optional): if :obj:`True`, export all attributes including those
                not explictly included in `Model.Meta.attribute_order`
            ignore_missing_attributes (:obj:`bool`, optional): if :obj:`False`, report an error if a
                worksheet/file doesn't contain all of attributes in a model in `models`
            ignore_extra_attributes (:obj:`bool`, optional): if :obj:`True`, do not report errors if
                attributes in the data are not in the model
            ignore_attribute_order (:obj:`bool`): if :obj:`True`, do not require the attributes to be provided
                in the canonical order
            group_objects_by_model (:obj:`bool`, optional): if :obj:`True`, group decoded objects by their
                types
            validate (:obj:`bool`, optional): if :obj:`True`, validate the data

        Returns:
            :obj:`dict`: model objects grouped by `obj_tables.Model` class

        Raises:
            :obj:`ValueError`: if the file defines zero or multiple models or the model defined in the file(s) is
                invalid
        """
        if issubclass(self.get_reader(path), obj_tables.io.WorkbookReader):
            Writer.validate_implicit_relationships()

        if models is None:
            models = self.MODELS

        config = wc_lang.config.core.get_config()['wc_lang']['io']
        if ignore_missing_models is None:
            ignore_missing_models = not config['strict']
        if ignore_sheet_order is None:
            ignore_sheet_order = not config['strict']
        if ignore_missing_attributes is None:
            ignore_missing_attributes = not config['strict']
        if ignore_extra_attributes is None:
            ignore_extra_attributes = not config['strict']
        if ignore_attribute_order is None:
            ignore_attribute_order = not config['strict']

        objects = super(Reader, self).run(
            path,
            models=models,
            ignore_missing_models=ignore_missing_models,
            ignore_sheet_order=ignore_sheet_order,
            include_all_attributes=include_all_attributes,
            ignore_missing_attributes=ignore_missing_attributes,
            ignore_extra_attributes=ignore_extra_attributes,
            ignore_attribute_order=ignore_attribute_order,
            group_objects_by_model=group_objects_by_model,
            validate=False)

        # check that file only has 1 model
        if len(objects[core.Model]) != 1:
            raise ValueError('"{}" should define one model'.format(path))
        model = objects[core.Model][0]

        # add implicit relationships to `Model`
        if issubclass(self.get_reader(path), obj_tables.io.WorkbookReader):
            for cls, cls_objects in objects.items():
                for attr in cls.Meta.attributes.values():
                    if isinstance(attr, obj_tables.RelatedAttribute) and \
                            attr.related_class == core.Model:
                        for cls_obj in cls_objects:
                            setattr(cls_obj, attr.name, model)

        # validate
        config = wc_lang.config.core.get_config()['wc_lang']['io']
        if (validate is not None and validate) or (validate is None
                                                   and config['validate']):
            objs = []
            for cls_objs in objects.values():
                objs.extend(cls_objs)

            errors = obj_tables.Validator().validate(objs)
            if errors:
                raise ValueError(
                    indent_forest([
                        'The model cannot be loaded because it fails to validate:',
                        [errors]
                    ]))

        # return model
        return objects
Пример #7
0
 def test_indent_forest_with_string(self):
     forest = 'forest'
     result = 'forest'
     self.assertEqual(string.indent_forest(forest, indentation=3), result)
Пример #8
0
    def run(self,
            core_path,
            seq_path='',
            rewrite_seq_path=True,
            taxon='prokaryote',
            models=None,
            ignore_missing_models=None,
            ignore_extra_models=None,
            ignore_sheet_order=None,
            include_all_attributes=False,
            ignore_missing_attributes=None,
            ignore_extra_attributes=None,
            ignore_attribute_order=None,
            group_objects_by_model=True,
            validate=True,
            read_metadata=False):
        """ Read knowledge base from file(s)

        Args:
            core_path (:obj:`str`): path to core knowledge base
            seq_path (:obj:`str`): path to genome sequence
            rewrite_seq_path (:obj:`bool`, optional): if :obj:`True`, the path to genome sequence in the knowledge base
                will be updated to the provided seq_path
            taxon (:obj:`str`, optional): type of model order to use
            models (:obj:`types.TypeType` or :obj:`list` of :obj:`types.TypeType`, optional): type
                of object to read or list of types of objects to read
            ignore_missing_models (:obj:`bool`, optional): if :obj:`False`, report an error if a worksheet/
                file is missing for one or more models
            ignore_extra_models (:obj:`bool`, optional): if :obj:`True` and all `models` are found, ignore
                other worksheets or files
            ignore_sheet_order (:obj:`bool`, optional): if :obj:`True`, do not require the sheets to be provided
                in the canonical order
            include_all_attributes (:obj:`bool`, optional): if :obj:`True`, export all attributes including those
                not explictly included in `Model.Meta.attribute_order`
            ignore_missing_attributes (:obj:`bool`, optional): if :obj:`False`, report an error if a
                worksheet/file doesn't contain all of attributes in a model in `models`
            ignore_extra_attributes (:obj:`bool`, optional): if :obj:`True`, do not report errors if
                attributes in the data are not in the model
            ignore_attribute_order (:obj:`bool`): if :obj:`True`, do not require the attributes to be provided
                in the canonical order
            group_objects_by_model (:obj:`bool`, optional): if :obj:`True`, group decoded objects by their
                types
            validate (:obj:`bool`, optional): if :obj:`True`, validate the data
            read_metadata (:obj:`bool`, optional): if :obj:`True`, read metadata models

        Returns:
            :obj:`dict`: model objects grouped by `obj_tables.Model` class

        Raises:
            :obj:`ValueError`: if :obj:`core_path`

                * Defines multiple knowledge bases or cells
                * Represents objects that cannot be linked to a knowledge base and/or cell
        """
        if issubclass(self.get_reader(core_path),
                      obj_tables.io.WorkbookReader):
            Writer.validate_implicit_relationships()

        if taxon == 'prokaryote':
            models = PROKARYOTE_MODELS
        elif taxon == 'eukaryote':
            models = EUKARYOTE_MODELS
        else:
            raise ValueError('Unsupported taxon "{}"'.format(taxon))

        if read_metadata:
            models = list(models) + [
                obj_tables.utils.DataRepoMetadata,
                obj_tables.utils.SchemaRepoMetadata
            ]
            ignore_missing_models = True
            ignore_sheet_order = True

        config = wc_kb.config.core.get_config()['wc_kb']['io']
        if ignore_missing_models is None:
            ignore_missing_models = not config['strict']
        if ignore_extra_models is None:
            ignore_extra_models = not config['strict']
        if ignore_sheet_order is None:
            ignore_sheet_order = not config['strict']
        if ignore_missing_attributes is None:
            ignore_missing_attributes = not config['strict']
        if ignore_extra_attributes is None:
            ignore_extra_attributes = not config['strict']
        if ignore_attribute_order is None:
            ignore_attribute_order = not config['strict']

        # read core objects from file
        objects = super(Reader, self).run(
            core_path,
            schema_name='wc_kb.' + taxon,
            models=models,
            ignore_missing_models=ignore_missing_models,
            ignore_extra_models=ignore_extra_models,
            ignore_sheet_order=ignore_sheet_order,
            include_all_attributes=include_all_attributes,
            ignore_missing_attributes=ignore_missing_attributes,
            ignore_extra_attributes=ignore_extra_attributes,
            ignore_attribute_order=ignore_attribute_order,
            group_objects_by_model=True,
            validate=False)

        # Check if sequence pathes are consistent
        for idx, chromosome in enumerate(objects[wc_kb.core.DnaSpeciesType]):

            if (chromosome.sequence_path is None) or (chromosome.sequence_path
                                                      == ''):

                chromosome.sequence_path = seq_path  # Set seq_path to be what is provided to wc_kb.io.Reader()
                if idx != 0:
                    warnings.warn(
                        'Same sequence file is associated with multiple chromosomes, '
                        'make sure seq file is formatted accordingly!')

            else:

                if chromosome.sequence_path != seq_path:
                    warnings.warn(
                        'Sequence path ({}) provided in KB file ({}) is different from \
                                           seq_path provided to wc_kb.io.Reader ({}).'
                        .format(chromosome.sequence_path, core_path, seq_path))

        # check that file has 1 knowledge base
        if len(objects[core.KnowledgeBase]) != 1:
            raise ValueError(
                '"{}" should define one knowledge base'.format(core_path))
        kb = objects[core.KnowledgeBase][0]

        # check that file has 0 or 1 cells
        if not objects[core.Cell]:
            cell = None
        elif len(objects[core.Cell]) == 1:
            cell = objects[core.Cell][0]
        else:
            raise ValueError(
                '"{}" should define zero or one cells'.format(core_path))

        # add implict relationships to `KnowledgeBase` and `Cell`
        kb.cell = cell

        for model, model_objects in objects.items():
            for attr in model.Meta.attributes.values():
                if isinstance(attr, obj_tables.RelatedAttribute
                              ) and attr.related_class == core.Cell:
                    for model_obj in model_objects:
                        setattr(model_obj, attr.name, cell)

        # link path to genome sequence to the DNA species types if rewrite_seq_path is True
        if rewrite_seq_path:
            for dna in Bio.SeqIO.parse(seq_path, "fasta"):
                species_type = kb.cell.species_types.get_one(id=dna.id)
                species_type.sequence_path = seq_path

        # validate
        config = wc_kb.config.core.get_config()['wc_kb']['io']
        if (validate is not None and validate) or (validate is None
                                                   and config['validate']):
            objs = []
            for cls_objs in objects.values():
                objs.extend(cls_objs)

            errors = obj_tables.Validator().validate(objs)
            if errors:
                raise ValueError(
                    indent_forest([
                        'The knowledge base cannot be loaded because it fails to validate:',
                        [errors]
                    ]))

        # if `group_objects_by_model` is False, flatten objects into list
        if not group_objects_by_model:
            flat_objects = []
            for model_objs in objects.values():
                flat_objects.extend(list(model_objs))
            objects = flat_objects

        return objects
Пример #9
0
def prepare_model(model):
    PrepForWcSimTransform().run(model)
    errors = Validator().run(model)
    if errors:
        raise ValueError(indent_forest(['The model is invalid:', [errors]]))
Пример #10
0
    def run(self, core_path, seq_path, rewrite_seq_path=True, schema=True, strict=True):
        """ Read knowledge base from file(s)

        Args:
            core_path (:obj:`str`): path to core knowledge base
            seq_path (:obj:`str`): path to genome sequence
            rewrite_seq_path (:obj:`bool`, optional): if :obj:`True`, the path to genome sequence in the knowledge base
                will be updated to the provided seq_path 
            schema (:obj:`bool`, optional): if :obj:`True`, use model order for prokaryote, else use model order for eukaryote
            strict (:obj:`bool`, optional): if :obj:`True`, validate that the the model file(s) strictly follow the
                :obj:`obj_model` serialization format:

                * The worksheets are in the expected order
                * There are no missing worksheets
                * There are no extra worksheets
                * The columns are in the expected order
                * There are no missing columns
                * There are no extra columns

        Returns:
            :obj:`core.KnowledgeBase`: knowledge base

        Raises:
            :obj:`ValueError`: if :obj:`core_path`

                * Defines multiple knowledge bases or cells
                * Represents objects that cannot be linked to a knowledge base and/or cell
        """
        Writer.validate_implicit_relationships()

        if schema:
            model_order = PROKARYOTE_MODEL_ORDER
        else:
            model_order = EUKARYOTE_MODEL_ORDER      

        # read core objects from file
        _, ext = os.path.splitext(core_path)
        reader = obj_model.io.get_reader(ext)()

        kwargs = {}
        if isinstance(reader, obj_model.io.WorkbookReader):
            kwargs['include_all_attributes'] = False
            if not strict:
                kwargs['ignore_missing_sheets'] = True
                kwargs['ignore_extra_sheets'] = True
                kwargs['ignore_sheet_order'] = True
                kwargs['ignore_missing_attributes'] = True
                kwargs['ignore_extra_attributes'] = True
                kwargs['ignore_attribute_order'] = True          

        objects = reader.run(
            core_path, models=model_order, validate=False, **kwargs)

        # check that file has 0 or 1 knowledge bases
        if not objects[core.KnowledgeBase]:
            for model, model_objects in objects.items():
                if model_objects:
                    raise ValueError('"{}" cannot contain instances of `{}` without an instance of `KnowledgeBase`'.format(
                        core_path, model.__name__))
            return None

        elif len(objects[core.KnowledgeBase]) > 1:
            raise ValueError(
                '"{}" should define one knowledge base'.format(core_path))

        else:
            kb = objects[core.KnowledgeBase].pop()

        # check that file has 0 or 1 cells
        if not objects[core.Cell]:
            for model, model_objects in objects.items():
                if model_objects:
                    raise ValueError('"{}" cannot contain instances of `{}` without an instance of `Cell`'.format(
                        core_path, model.__name__))
            cell = None

        elif len(objects[core.Cell]) > 1:
            raise ValueError('"{}" should define one cell'.format(core_path))

        else:
            cell = objects[core.Cell].pop()

        # add implict relationships to `KnowledgeBase` and `Cell`
        kb.cell = cell

        for model, model_objects in objects.items():
            for attr in model.Meta.attributes.values():
                if isinstance(attr, obj_model.RelatedAttribute) and attr.related_class == core.Cell:
                    for model_obj in model_objects:
                        setattr(model_obj, attr.name, cell)

        # link path to genome sequence to the DNA species types if rewrite_seq_path is True
        if rewrite_seq_path:
            for dna in Bio.SeqIO.parse(seq_path, "fasta"):
                kb.cell.species_types.get_one(id=dna.id).sequence_path = seq_path

        # validate
        objs = []
        for cls_objs in objects.values():
            objs.extend(cls_objs)

        errors = obj_model.Validator().validate(objs)
        if errors:
            raise ValueError(
                indent_forest(['The knowledge base cannot be loaded because it fails to validate:', [errors]]))

        # return kb
        return kb
Пример #11
0
    def make_test_model(
            cls,
            model_type,
            init_vols=None,
            init_vol_stds=None,
            density=1100,
            molecular_weight=10.,
            charge=0,
            num_submodels=1,
            default_species_copy_number=1000000,
            default_species_std=100000,
            species_copy_numbers=None,
            species_stds=None,
            transfer_reactions=False,
            transform_prep_and_check=True,
            submodel_framework='WC:stochastic_simulation_algorithm'):
        """ Create a whole-cell model for testing `wc_sim` software

        Properties of the model:

        * Each submodel uses the integration framework in `submodel_framework`
        * Each submodel has one compartment

        Args:
            model_type (:obj:`str`): model type description
            init_vols (:obj:`list` of :obj:`float`, optional): initial volume of each compartment; default=1E-16
            init_vol_stds (:obj:`list` of :obj:`float`, optional): initial std. dev. of volume of each
                compartment; default=`init_vols/10.`
            density (:obj:`float`, optional): the density of each compartment; default=1100 g/l
            molecular_weight (:obj:`float`, optional): the molecular weight of each species type; default=10
            charge (:obj:`int`, optional): charge of each species type; default=0
            num_submodels (:obj:`int`, optional): number of submodels
            default_species_copy_number (:obj:`int`, optional): default population of all species in
                their compartments
            default_species_std (:obj:`int`, optional): default standard deviation of population of
                all species in their compartments
            species_copy_numbers (:obj:`dict`, optional): populations for particular species, which
                overrides `default_species_copy_number`
            species_stds (:obj:`dict`, optional): standard deviations for particular species, which
                overrides `default_species_std`
            transfer_reactions (:obj:`bool`, optional): whether the model contains transfer reactions
                between compartments; to be implemented
            transform_prep_and_check (:obj:`bool`, optional): whether to transform, prepare and check
                the model
            submodel_framework (:obj:`str`, optional): the integration fraemwork for submodels; default is
                `WC:stochastic_simulation_algorithm`

        Returns:
            :obj:`Model`: a `wc_lang` model

        Raises:
            :obj:`ValueError`: if arguments are inconsistent
        """
        # TODO(Arthur): implement transfer reactions
        num_species, num_reactions, reversible, rate_law_type = cls.get_model_type_params(
            model_type)
        if (2 < num_species or 1 < num_reactions
                or (0 < num_reactions and num_species == 0) or
            (rate_law_type == RateLawType.product_pop and num_species != 2)):
            raise ValueError(
                "invalid combination of num_species ({}), num_reactions ({}), rate_law_type ({})"
                .format(num_species, num_reactions, rate_law_type.name))

        if num_submodels < 1:
            raise ValueError(
                "invalid num_submodels ({})".format(num_submodels))

        # Model
        model = Model(id='test_model',
                      name='{} with {} submodels'.format(
                          model_type, num_submodels),
                      version='0.0.0',
                      wc_lang_version='0.0.1')

        structure = ChemicalStructure(molecular_weight=molecular_weight,
                                      charge=charge)

        # make compartments
        default_vol = 1E-16
        init_vols = [default_vol
                     ] * num_submodels if init_vols is None else init_vols
        init_vols = np.asarray(init_vols)
        init_vol_stds = init_vols / 10. if init_vol_stds is None else np.asarray(
            init_vol_stds)
        if len(init_vols) != num_submodels or len(
                init_vol_stds) != num_submodels:
            raise ValueError(
                "len(init_vols) ({}) or len(init_vol_stds) ({}) != num_submodels ({})"
                .format(len(init_vols), len(init_vol_stds), num_submodels))

        # make InitVolumes, which must have unique attributes for round-trip model file equality
        initial_volumes = {}
        for i in range(num_submodels):
            attributes = (init_vols[i], init_vol_stds[i])
            if attributes not in initial_volumes:
                initial_volumes[attributes] = InitVolume(
                    mean=init_vols[i],
                    std=init_vol_stds[i],
                    units=unit_registry.parse_units('l'))
        compartments = []
        for i in range(num_submodels):
            comp_num = i + 1
            init_volume = initial_volumes[(init_vols[i], init_vol_stds[i])]
            comp = model.compartments.create(
                id='compt_{}'.format(comp_num),
                name='compartment num {}'.format(comp_num),
                biological_type=onto['WC:cellular_compartment'],
                init_volume=init_volume)
            comp.init_density = model.parameters.create(
                id='density_compt_{}'.format(comp_num),
                value=density,
                units=unit_registry.parse_units('g l^-1'))
            compartments.append(comp)

        # make SpeciesTypes
        species_types = []
        for i in range(num_species):
            spec_type = model.species_types.create(
                id='spec_type_{}'.format(i),
                type=onto['WC:protein'],  # protein
                structure=structure)
            species_types.append(spec_type)

        # make submodels
        expressions = {}
        for i in range(num_submodels):
            submodel_num = i + 1
            cls.add_test_submodel(
                model,
                model_type,
                submodel_num,
                compartments[i],
                species_types,
                default_species_copy_number=default_species_copy_number,
                default_species_std=default_species_std,
                species_copy_numbers=species_copy_numbers,
                species_stds=species_stds,
                expressions=expressions,
                submodel_framework=submodel_framework)

        if transform_prep_and_check:
            # prepare & check the model
            PrepForWcSimTransform().run(model)
            errors = Validator().run(model)
            if errors:
                raise ValueError(
                    indent_forest(['The model is invalid:', [errors]]))

        # create Manager indices
        # TODO(Arthur): should be automated in a finalize() method
        for base_model in [
                Submodel, SpeciesType, Reaction, Observable, Compartment,
                Parameter
        ]:
            base_model.get_manager().insert_all_new()

        return model