Exemplos de sanitize_type em Python, exemplos de insanity.sanitize_type em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: ordered_set.py Projeto: phohenecker/rel-data

    def __init__(self,
                 element_type: typing.Type[T],
                 index_func: typing.Callable[[T], int],
                 data: typing.Iterable[T] = None):
        """Creates a new empty ``OrderedSet``.
        
        Args:
            element_type (type): The type of the elements that will be added to the newly created set.
            index_func (function): A function that maps instances of type ``element_type`` to (unique) integer indices.
            data (Iterable, optional): An optional iterable that specifies data to add the newly created set.
        """
        # sanitize args
        insanity.sanitize_type("element_type", element_type, type)
        if not callable(index_func):
            raise TypeError("The parameter <index_func> has to be callable!")
        insanity.sanitize_type("data",
                               data,
                               collections.Iterable,
                               none_allowed=True)

        # define attributes
        self._data = []  # a list that stores the data orderly
        self._element_type = element_type  # the type of elements of the set
        self._index_func = index_func  # a function that maps elements to indices
        self._len = 0  # the length of self._data
        self._num_elements = 0  # the actual number of elements in self._data (without Nones)
        self._observers = [
        ]  # a list of all registered observers of an OrderedSet

        # add provided data
        if data is not None:
            self.add_all(data)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: value_spec.py Projeto: phohenecker/arg-magiq

    def __init__(self, name: str, description: str, data_type: type,
                 required: bool, default_value: typing.Any):
        """Creates a new instance of ``ConfigValue``.

        Args:
            Cf. the documentation of the according properties.

        Raises:
            ValueError: If ``data_type`` is ``bool`` and ``default_value`` is ``None``.
        """

        # sanitize args
        name = str(name)
        if not name:
            raise ValueError("<name> cannot be the empty string")
        description = str(description)
        insanity.sanitize_type("data_type", data_type, type)
        if data_type == bool and default_value is None:
            raise ValueError(
                "For <data_type> bool, a default value is required")
        required = bool(required)

        # store args
        self._data_type = data_type
        self._default_value = default_value
        self._description = description
        self._name = name
        self._required = required

Exemplo n.º 3

0

Exibir arquivo

Arquivo: config.py Projeto: phohenecker/family-tree-data-gen

 def max_branching_factor(self, max_branching_factor: int) -> None:
     insanity.sanitize_type("max_branching_factor", max_branching_factor,
                            int)
     insanity.sanitize_range("max_branching_factor",
                             max_branching_factor,
                             minimum=1)
     self._max_branching_factor = max_branching_factor

Exemplo n.º 4

0

Exibir arquivo

    def __init__(self,
                 literal: literal_type.LiteralType,
                 value,
                 inferred: bool = False,
                 prediction: bool = False):
        """Creates a new instance of ``LiteralValue`` that specifies a type of literal together with an according value .

        Args:
            literal (:class:`literal_type.LiteralType`): Specifies :attr:`literal`.
            value: Specifies :attr:`value`.
            inferred (bool, optional): Specifies :attr:`inferred`.
            prediction (bool, optional): Specifies :attr:`prediction`.
        """
        # sanitize args
        insanity.sanitize_type("literal", literal, literal_type.LiteralType)
        if inferred and prediction:
            raise ValueError(
                "A literal cannot be an inference and a prediction at the same time!"
            )

        # specify attributes
        self._literal = literal
        self._inferred = bool(inferred)
        self._prediction = bool(prediction)
        self._value = value

Exemplo n.º 5

0

Exibir arquivo

    def __init__(self,
                 message: str,
                 decimal_places: int = 3,
                 terminal_break: bool = False,
                 skip_output: bool = False):
        """Creates a new instance of ``Timer``.

        Args:
            message (str): The message to print at the end of the ``with`` block. Notice that the measured time, as
                string ``" in X.XXXs"``, is appended to the provided message automatically.
                this message
            decimal_places (int, optional): The number of decimal places to print for the time, which is measured and
                printed in seconds.
            terminal_break (bool, optional): Indicates whether to add an additional line break to the printed message.
            skip_output (bool, optional): If ``True``, then no output is printed to the screen.
        """

        # sanitize args
        insanity.sanitize_type("decimal_places", decimal_places, int)
        insanity.sanitize_range("decimal_places", decimal_places, minimum=0)

        # create the message to print at the end of the with-block
        self._message = str(message).strip() + " in {:.%df}s" % decimal_places
        if terminal_break:
            self._message += "\n"

        self._start = None  # the time when the clock is started
        self._total = 0  # the total time measured

        self._skip_output = bool(
            skip_output
        )  # indicates whether to print the time at the end of a ``with`` block

Exemplo n.º 6

0

Exibir arquivo

Arquivo: kg_reader.py Projeto: phohenecker/rel-data

    def read_all_sequences(
            cls,
            input_dir: str,
            executor: futures.Executor = None
    ) -> typing.List[typing.List[knowledge_graph.KnowledgeGraph]]:
        """Loads all knowledge-graph sequences that are discovered in the specified directory.

        Args:
            input_dir (str): The path of the directory that is being searched.
            executor (futures.Executor, optional): An optional executor for loading multiple knowledge-graph sequences
                concurrently.

        Returns:
            list[list[:class:`knowledge_graph.KnowledgeGraph`]]: All knowledge-graph sequences that were found in
                ``input_dir``.

        Raises:
            ValueError: If the specified directory does not exist.
        """
        # sanitize args
        input_dir = str(input_dir)
        if not os.path.isdir(input_dir):
            raise ValueError("The specified <input_dir> does not exist: '{}'!".format(input_dir))
        insanity.sanitize_type("executor", executor, futures.Executor, none_allowed=True)
    
        # find all knowledge-graph sequences in the input directory
        all_seq = io.find_knowledge_graph_sequences(input_dir)
    
        # load all knowledge graphs that were found
        if executor is None:
            return [cls.read_sequence(input_dir, seq) for seq in all_seq]
        else:
            all_seq = [os.path.join(input_dir, seq) for seq in all_seq]
            return list(executor.map(cls._read_seq_from_one, all_seq))

Exemplo n.º 7

0

Exibir arquivo

    def __init__(self, facts: typing.Iterable[literal.Literal],
                 inferences: typing.Iterable[literal.Literal]):
        """Creates a new instance of ``AnswerSet``.

        Args:
            facts (list[:class:`literal.Literal`]): The facts contained in the answer set.
            inferences (list[:class:`literal.Literal`]): The inferences contained in the answer set.

        Raises:
            TypeError: If any of ``facts`` and ``inferences`` is not an ``Iterable`` of instances of type
                :class:`literal.Literal`.
        """
        # sanitize args
        insanity.sanitize_type("facts", facts, collections.Iterable)
        facts = set(facts)
        insanity.sanitize_iterable("facts",
                                   facts,
                                   elements_type=literal.Literal)
        insanity.sanitize_type("inferences", inferences, collections.Iterable)
        inferences = set(inferences)
        insanity.sanitize_iterable("inferences",
                                   inferences,
                                   elements_type=literal.Literal)

        # define attributes
        self._facts = facts
        self._inferences = inferences

Exemplo n.º 8

0

Exibir arquivo

Arquivo: config.py Projeto: phohenecker/family-tree-data-gen

 def stop_prob(self, stop_prob: numbers.Real) -> None:
     insanity.sanitize_type("stop_prob", stop_prob, numbers.Real)
     insanity.sanitize_range("stop_prob",
                             stop_prob,
                             minimum=0,
                             maximum=1,
                             max_inclusive=False)
     self._stop_prob = float(stop_prob)

Exemplo n.º 9

0

Exibir arquivo

Arquivo: default_parser_factory.py Projeto: phohenecker/arg-magic

    def create_parser(
            self, parser: argparse.ArgumentParser,
            config: config_value.ConfigValue) -> argparse.ArgumentParser:
        """
        Raises:
            ValueError: If the type of the provided ``config`` is not supported by the ``DefaultParserFactory``.
        """
        # sanitize args
        insanity.sanitize_type("parser", parser, argparse.ArgumentParser)
        insanity.sanitize_type("config", config, config_value.ConfigValue)

        # check if the type of the provided config is supported
        if not issubclass(
                config.data_type,
                enum.Enum) and config.data_type not in self.SUPPORTED_TYPES:
            raise ValueError(
                "The data type of the provided <config> is not supported: {}!".
                format(config.data_type.__qualname__))

        # the names of the command line args are simply those of the corresponding config values where
        # underscores are replaced with dashes
        # furthermore, optional args get "--" prepended to their names
        arg_name = config.name.replace("_", "-")
        if not self._positional_args or not config.required:
            arg_name = "--" + arg_name

        # boolean optional are treated differently (notice that they are required to have default values)
        # if the default value is True, then the name of the according option starts with "--no-"
        if config.data_type == bool:
            if config.default_value:
                arg_name = "--no-" + arg_name[2:]
            parser.add_argument(arg_name,
                                dest=config.name,
                                action="store_const",
                                const=not config.default_value,
                                default=config.default_value,
                                help=config.description)
        else:
            if config.exhaustive:
                arg_type = self._enum_type(config.data_type)
            elif config.data_type == dict or config.data_type == list:
                arg_type = yaml.load
            else:
                arg_type = config.data_type

            if self._positional_args and config.required:
                parser.add_argument(config.name,
                                    type=arg_type,
                                    default=config.default_value,
                                    help=config.description)
            else:
                parser.add_argument(arg_name,
                                    dest=config.name,
                                    type=arg_type,
                                    default=config.default_value,
                                    help=config.description)

        return parser

Exemplo n.º 10

0

Exibir arquivo

Arquivo: ordered_set.py Projeto: phohenecker/rel-data

 def add_all(self, elements: typing.Iterable[T]) -> None:
     """Adds all elements in the provided ``Iterable`` to an ``OrderedSet``.
     
     Args:
         elements (Iterable): The elements to add.
     """
     insanity.sanitize_type("elements", elements, collections.Iterable)
     for e in elements:
         self.add(e)

Exemplo n.º 11

0

Exibir arquivo

Arquivo: config_spec.py Projeto: phohenecker/arg-magiq

    def add_value(self, spec: value_spec.ValueSpec) -> None:
        """Adds a configuration value to the ``ConfigSpec``.

        Args:
            spec (:class:`value_spec.ValueSpec`): The specification of the configuration value to add.
        """

        insanity.sanitize_type("spec", spec, value_spec.ValueSpec)
        self._config_values.append(spec)

Exemplo n.º 12

0

Exibir arquivo

    def run(
        self, path: str, facts: typing.Iterable[literal.Literal]
    ) -> typing.List[answer_set.AnswerSet]:
        # sanitize args
        path = str(path)
        if not os.path.isfile(path):
            raise ValueError(
                "The provided <path> does not refer to an existing file: '{}'!"
                .format(path))
        insanity.sanitize_type("facts", facts, collections.Iterable)
        facts = set(facts)
        insanity.sanitize_iterable("facts",
                                   facts,
                                   elements_type=literal.Literal)
        self._sanitize_literals(facts)

        # prepare facts as single string to provide to DLV
        str_facts = ". ".join(str(f) for f in facts)
        if str_facts:
            str_facts += "."

        # run DLV
        cmd = "echo \"{}\" | {} -silent -- {}".format(str_facts,
                                                      self._dlv_path, path)
        result = str(
            subprocess.check_output(cmd, shell=True,
                                    universal_newlines=True)).strip()

        # check if any answer set has been provided at all
        if result == "":
            return []

        # split result into parts representing single answer sets
        result = [r.strip()[1:-1] for r in result.split("\n")]

        # create answer sets
        result_sets = []
        for r in result:  # iterate over all answer sets (i.e., string representations of them)

            # collect inferences
            inferences = set()
            if r != "":
                for x in r.split(", "):
                    m = re.match(self.LITERAL_PATTERN, x)
                    lit = literal.Literal(m.group("predicate"),
                                          m.group("terms").split(","),
                                          positive=m.group("sign") == "")
                    if lit not in facts:
                        inferences.add(lit)

            # create answer set
            result_sets.append(answer_set.AnswerSet(facts, inferences))

        return result_sets

Exemplo n.º 13

0

Exibir arquivo

Arquivo: observable_set.py Projeto: phohenecker/rel-data

    def add(self, elem) -> None:
        # sanitize args
        insanity.sanitize_type("elem", elem, self._element_type)

        # add element to list
        if elem not in self:
            self._data.add(elem)

            # notify observers
            for obs in self._observers:
                obs.element_added(elem)

Exemplo n.º 14

0

Exibir arquivo

Arquivo: training_executor.py Projeto: fmtumbuka/exp-base

    def __init__(self, conf: base_config.BaseConfig, ckpt_queue: queues.Queue):
        """Creates a new instance of ``TrainingExecutor``.

        Args:
            conf (:class:`base_config.BaseConfig`): The configuration of the conducted experiment.
            ckpt_queue (queues.Queue): The queue that is used for sending created checkpoints to the controller of the
                experiment.
        """

        # sanitize args
        insanity.sanitize_type("conf", conf, base_config.BaseConfig)
        insanity.sanitize_type("ckpt_queue", ckpt_queue, queues.Queue)

        # store args
        self._ckpt_queue = ckpt_queue
        self._conf = conf

Exemplo n.º 15

0

Exibir arquivo

    def add_config(self, config: config_value.ConfigValue):
        """Adds the provided configuration to the specification.
        
        Args:
            config (:class:`config_value.ConfigValue`): The configuration to add.
        
        Raises:
            ValueError: If a config with the same name exists already.
            TypeError: If ``config`` is not of type :class:`config_value.ConfigValue`.
        """
        insanity.sanitize_type("config", config, config_value.ConfigValue)
        if config.name in self._values:
            raise ValueError(
                "This specification contains a configuration with name '{}' already!"
                .format(config.name))

        self._values[config.name] = config

Exemplo n.º 16

0

Exibir arquivo

    def __init__(self, conf: base_config.BaseConfig,
                 ckpt: typing.Optional[str]):
        """Creates a new instance of ``EvaluationExecutor``.

        Args:
            conf (:class:`base_config.BaseConfig`): The configuration of the conducted experiment.
            ckpt (str): The checkpoint to evaluate. This arg identifies a checkpoint that was just created by a training
                process, and is supposed to be evaluated (in parallel to the training) now. Notice that this is
                ``None``, if ``conf.test`` has been set to ``True`` (cf. :attr:`base_config.BaseConfig.test`), which
                means that a model is just being tested without any further training.
        """

        # sanitize args
        insanity.sanitize_type("conf", conf, base_config.BaseConfig)
        insanity.sanitize_type("ckpt", ckpt, str, none_allowed=True)

        # store args
        self._ckpt = ckpt
        self._conf = conf

Exemplo n.º 17

0

Exibir arquivo

Arquivo: class_membership.py Projeto: phohenecker/rel-data

 def __init__(self, cls: class_type.ClassType, is_member: bool, inferred: bool=False, prediction: bool=False):
     """Creates a new instance of ``ClassMembership`` that specifies the relation between an individual and a class.
     
     Args:
         cls (:class:`class_type.ClassType`): Specifies :attr:`class`.
         is_member (bool): Specifies :attr:`is_member`.
         inferred (bool, optional): Specifies :attr:`inferred`.
         prediction (bool, optional): Specifies :attr:`prediction`.
     """
     # sanitize args
     insanity.sanitize_type("cls", cls, class_type.ClassType)
     if inferred and prediction:
         raise ValueError("A class membership cannot be an inference and a prediction at the same time!")
     
     # specify attributes
     self._cls = cls
     self._inferred = bool(inferred)
     self._is_member = bool(is_member)
     self._prediction = bool(prediction)

Exemplo n.º 18

0

Exibir arquivo

def position(index: int) -> typing.Callable[[property], property]:
    """A decorator that allows for specifying the position of a property of a configuration class among all parsed
    positional args.
    
    Notice that ``argmagic`` ignore this decorator if positional args are not used or if the annotated property
    defines an optional configuration value.
    
    Args:
        index (int): The index of the annotated configuration in the sequence of positional args.
    """
    insanity.sanitize_type("index", index, int)
    insanity.sanitize_range("index", index, minimum=0)

    def _position(func: property) -> property:
        if not isinstance(func, property):
            raise TypeError(
                "The decorator @position may be applied to properties only!")
        func.fget.__dict__[argmagic.POSITION] = index
        return func

    return _position

Exemplo n.º 19

0

Exibir arquivo

def exhaustive(values: type) -> typing.Callable[[property], property]:
    """This is a decorator that allows for annotating a property of a configuration class with an ``Enum`` that
    describes the admissible values of the same.
    
    Notice that ``argmagic`` ignores this decorator for properties of type ``bool``.
    
    Args:
        values (type): An enum that specifies the admissible values of the annotated property.
    """
    insanity.sanitize_type("values", values, type)
    if not issubclass(values, enum.Enum):
        raise TypeError(
            "The parameter <values> has to be an Enum, but type {} is not!".
            format(type.__name__))

    def _exhaustive(func: property) -> property:
        if not isinstance(func, property):
            raise TypeError(
                "The decorator @exhaustive may be applied to properties only!")
        func.fget.__dict__[argmagic.CONFIG_VALUES] = values
        return func

    return _exhaustive

Exemplo n.º 20

0

Exibir arquivo

Arquivo: observable_set.py Projeto: phohenecker/rel-data

    def __init__(self,
                 element_type: typing.Type[T],
                 data: typing.Iterable[T] = None):
        """Creates a new instance of ``ObservableSet``.
        
        Args:
            element_type (type): The required type of the elements of the newly created set.
            data (Iterable): Initial elements to add to the new set.
        """
        # sanitize args
        insanity.sanitize_type("element_type", element_type, type)
        insanity.sanitize_type("data",
                               data,
                               collections.Iterable,
                               none_allowed=True)

        # define attributes
        self._data = set()
        self._observers = []
        self._element_type = element_type

        # add provided data
        if data is not None:
            self.add_all(data)

Exemplo n.º 21

0

Exibir arquivo

Arquivo: magiq_parser.py Projeto: phohenecker/arg-magiq

    def __init__(self, spec: type, app_name: str, app_description: str):
        """Creates a new instance of ``MagiqParser``.

        Args:
            spec (type): A configuration class that specifies how to parse args.
            app_name (str): The name of the application whose args are being parsed. This is printed in the help text.
            app_description (str): A description of the application whose args are being parsed. This is printed in the
                help text.
        """

        # sanitize args
        insanity.sanitize_type("spec", spec, type)
        if not inspect.isclass(spec):
            raise TypeError("<spec> has to be class")
        if len(inspect.signature(
                spec.__init__).parameters) != 1:  # -> 1 for self
            raise ValueError("<spec> has to have a no-arg constructor")
        app_name = str(app_name)
        app_description = str(app_description)

        # store args
        self._app_description = app_description
        self._app_name = app_name
        self._spec = spec

Exemplo n.º 22

0

Exibir arquivo

Arquivo: ordered_set.py Projeto: phohenecker/rel-data

    def add(self, element: T) -> None:
        # sanitize args
        insanity.sanitize_type("element", element, self._element_type)

        # get index of new element
        index = self._index_func(element)

        # check if element is contained already -> nothing to do
        if index < self._len and self._data[index] is not None:
            return

        # extend list of data if necessary
        if index >= self._len:
            new_entries = index - self._len + 1
            self._len = index + 1
            self._data.extend([None] * new_entries)

        # store provided element
        self._data[index] = element
        self._num_elements += 1

        # notify all observers about the new element
        for obs in self._observers:
            obs.element_added(element)

Exemplo n.º 23

0

Exibir arquivo

Arquivo: dummy_config_3.py Projeto: phohenecker/arg-magic

 def b(self, b: numbers.Real) -> None:
     insanity.sanitize_type("b", b, numbers.Real)
     self._b = float(b)

Exemplo n.º 24

0

Exibir arquivo

Arquivo: config.py Projeto: phohenecker/country-data-gen

 def seed(self, seed: int) -> None:
     insanity.sanitize_type("seed", seed, int)
     self._seed = seed

Exemplo n.º 25

0

Exibir arquivo

Arquivo: config.py Projeto: phohenecker/country-data-gen

 def num_training_samples(self, num_training_samples: int) -> None:
     insanity.sanitize_type("num_training_samples", num_training_samples, int)
     insanity.sanitize_range("num_training_samples", num_training_samples, minimum=1)
     self._num_training_samples = num_training_samples

Exemplo n.º 26

0

Exibir arquivo

Arquivo: config.py Projeto: phohenecker/country-data-gen

 def num_datasets(self, num_datasets: int) -> None:
     insanity.sanitize_type("num_datasets", num_datasets, int)
     insanity.sanitize_range("num_samples", num_datasets, minimum=1)
     self._num_datasets = num_datasets

Exemplo n.º 27

0

Exibir arquivo

    def create_individual(cls,
                          name: str,
                          target_type: typing.Type[T] = None,
                          args: typing.Iterable = None,
                          kwargs: typing.Dict[str, typing.Any] = None) -> T:
        """Constructs an instance of :class:`individual.Individual` with the provided name.
        
        The newly created instance is assigned a unique :attr:`individual.Individual.index`, and it is checked whether
        the provided name has been used already. Notice again that the ``IndividualFactory`` is sensitive to the
        currently active context.
        
        To create an instance of a subclass of :class:`individual.Individual`, one may provide the according type via
        the arg ``target_type``. The ``__init__`` of this class is supposed to accept ``index`` and ``name`` as the
        first two positional args. If the constructor should require additional args, then these may be specified via
        ``args`` and ``kwargs``. This means that an instance of ``target_type`` is created as follows:
        
            new_instance = target_type(index, name, *args, **kwargs)

        Args:
            name (str): The name to assign to the created :class:`individual.Individual`. If the provided ``name``
                is not a ``str``, then it is converted into such.
            target_type (type, optional): The type of the instance to create, which has to be a subclass of
                :class:`individual.Individual`.
            args (iterable, optional): The positional args to pass to the constructor of ``target_type``.
            kwargs (dict, optional): The keyword args to pass to the constructor of ``target_type``.

        Returns:
            :class:`individual.Individual`: The newly constructed instance.
        
        Raises:
            ValueError: If :attr:`check_names` is ``True`` and the provided ``name`` has been used before, or if
                a provided ``target_type`` is not a subclass of :class:`individual.Individual`.
        """
        # sanitize target type
        insanity.sanitize_type("target_type",
                               target_type,
                               type,
                               none_allowed=True)
        if target_type is not None and not issubclass(target_type,
                                                      individual.Individual):
            raise ValueError(
                "The provided <target_type> is not a subclass of reldata.individual.Individual: {}!"
                .format(target_type))

        # ensure that the name is a string
        name = str(name)

        # prepare context if necessary
        cls._prepare_context()

        # fetch current context
        ctx = dc.DataContext.get_context()

        # sanitize name if configured to do so
        if cls.check_names:
            if name in ctx[cls._USED_NAMES]:
                raise ValueError(
                    "An individual with name '{}' exists already!".format(
                        name))
            ctx[cls._USED_NAMES].add(name)

        # create individual
        ctx[cls._LAST_INDEX] += 1
        if target_type is None:
            return _Individual(ctx[cls._LAST_INDEX], name)
        else:
            args = [] if args is None else args
            kwargs = {} if kwargs is None else kwargs
            return target_type(ctx[cls._LAST_INDEX], name, *args, **kwargs)

Exemplo n.º 28

0

Exibir arquivo

Arquivo: dummy_config_3.py Projeto: phohenecker/arg-magic

 def c(self, c: int) -> None:
     insanity.sanitize_type("c", c, int)
     self._c = c

Exemplo n.º 29

0

Exibir arquivo

    def generate_datasets(
            self,
            num_datasets: int,
            num_training_samples: int,
            output_dir: str
    ) -> None:
        """Generates datasets from the data that was provided to this instance of ``DatasetGenerator`, and writes them
        to disk.
        
        Args:
            num_datasets (int): The total number of datasets to create.
            num_training_samples (int): The number of training samples to create for each dataset.
            output_dir (str): The path of the output directory.
        """
        # sanitize args
        insanity.sanitize_type("num_datasets", num_datasets, int)
        insanity.sanitize_range("num_datasets", num_datasets, minimum=1)
        insanity.sanitize_type("num_training_samples", num_training_samples, int)
        insanity.sanitize_range("num_training_samples", num_training_samples, minimum=1)

        # create patterns for the names of the directories that are created for the single datasets and for
        # the base names of training samples
        output_dir_pattern = "{:0" + str(len(str(num_datasets - 1))) + "d}"
        sample_filename_pattern = "{:0" + str(len(str(num_training_samples - 1))) + "d}"
        
        for dataset_idx in range(num_datasets):
            
            print("generating dataset #{}...".format(dataset_idx))

            # assemble needed paths
            ds_output_dir = os.path.join(output_dir, output_dir_pattern.format(dataset_idx))
            train_dir = os.path.join(ds_output_dir, "train")
            dev_dir = os.path.join(ds_output_dir, "dev")
            test_dir = os.path.join(ds_output_dir, "test")

            # create folder structure for storing the current dataset
            if not os.path.isdir(ds_output_dir):
                os.mkdir(ds_output_dir)
            if not os.path.isdir(train_dir):
                os.mkdir(train_dir)
            if not os.path.isdir(dev_dir):
                os.mkdir(dev_dir)
            if not os.path.isdir(test_dir):
                os.mkdir(test_dir)
        
            # split countries into train/dev/test
            train, dev, test = self._split_countries()

            # write selected dev+test countries to disk
            with open(os.path.join(ds_output_dir, "countries.dev.txt"), "w") as f:
                for c in dev:
                    f.write("{}\n".format(c))
            with open(os.path.join(ds_output_dir, "countries.test.txt"), "w") as f:
                for c in test:
                    f.write("{}\n".format(c))
            
            # create training samples + write them to disk
            for sample_idx in range(num_training_samples):
                print("generating training sample #{}...".format(sample_idx))
                sample = self._generate_sample(train)
                kg_writer.KgWriter.write(sample, train_dir, sample_filename_pattern.format(sample_idx))
            
            # create evaluation sample + write it to disk
            print("generating dev sample... ")
            dev_sample = self._generate_sample(train, inf_countries=dev, minimal=True)
            kg_writer.KgWriter.write(dev_sample, dev_dir, "dev")

            # create test sample + write it to disk
            print("generating test sample...")
            test_sample = self._generate_sample(train, inf_countries=test, minimal=True)
            kg_writer.KgWriter.write(test_sample, test_dir, "test")
            
            # print statistics about test sample
            num_spec = len([t for t in test_sample.triples if not t.inferred])
            num_inf = len([t for t in test_sample.triples if t.inferred])
            print("number triples in test sample: {} ({} spec / {} inf)".format(num_spec + num_inf, num_spec, num_inf))

            print("OK\n")

Exemplo n.º 30

0

Exibir arquivo

 def __init__(
         self,
         data: typing.Dict[str, country.Country],
         problem_setting: str,
         solver: base_solver.BaseSolver,
         ontology_path: str,
         class_facts: bool
 ):
     """Creates a new instance of ``DataGenerator`` for creating datasets from the provided data.
     
     Args:
         data (collections.OrderedDict): The data to generate datasets form. This is supposed to map country names to
             lists of neighbors, given in terms of the same names.
         problem_setting (str): The considered problem setting.
         solver (:class:`base_solver.BaseSolver`): The ASP solver to use.
         ontology_path (str): The path to the ASP program that describes the used ontology.
         class_facts (bool): Indicates whether to include class facts in generated samples.
     """
     # sanitize args
     insanity.sanitize_type("data", data, collections.OrderedDict)
     problem_setting = str(problem_setting)
     insanity.sanitize_value("problem_setting", problem_setting, [self.PROBLEM_S1, self.PROBLEM_S2, self.PROBLEM_S3])
     insanity.sanitize_type("solver", solver, base_solver.BaseSolver)
     ontology_path = str(ontology_path)
     if not os.path.isfile(ontology_path):
         raise ValueError(
                 "The provided <ontology_path> does not refer to an existing file: '{}'!".format(ontology_path)
         )
     
     # define attributes
     self._class_facts = bool(class_facts)    # indicates whether to include class facts in samples
     self._classes = {}                       # maps class names to individuals
     self._data = data                        # the provided data as dict from country names to Country objects
     self._ontology_path = ontology_path      # the ASP program that describes the ontology
     self._problem_setting = problem_setting  # the considered version of the reasoning problem
     self._regions = None                     # maps region names to lists of (names of) subregions
     self._relations = {}                     # maps relation names to individuals
     self._solver = solver                    # the used ASP solver
     
     # fix the names of countries, regions, and subregions in the data (DLV expects camel case)
     self._data = {self._fix_name(k): v for k, v in self._data.items()}
     for c in self._data.values():
         c.name = self._fix_name(c.name)
         c.region = self._fix_name(c.region)
         c.subregion = None if c.subregion is None else self._fix_name(c.subregion)
         c.neighbors = [self._fix_name(n) for n in c.neighbors]
     
     # extract regions/subregions from the data
     regions = {}
     for c in self._data.values():
         if c.region not in regions:
             regions[c.region] = set()
         if c.subregion is not None and c.subregion not in regions[c.region]:
             regions[c.region].add(c.subregion)
     
     # sort regions/subregions alphabetically
     self._regions = collections.OrderedDict(
             (r, list(x for x in sorted(s) if x is not None))
             for r, s in sorted(regions.items(), key=lambda x: x[0])
     )
     
     # prepare all reusable parts of any (subsequently) generated sample knowledge graph
     with dc.DataContext():
         
         # create all classes
         self._classes[voc.CLASS_COUNTRY] = ctf.ClassTypeFactory.create_class(voc.CLASS_COUNTRY)
         self._classes[voc.CLASS_REGION] = ctf.ClassTypeFactory.create_class(voc.CLASS_REGION)
         self._classes[voc.CLASS_SUBREGION] = ctf.ClassTypeFactory.create_class(voc.CLASS_SUBREGION)
         
         # create all relations
         self._relations[voc.RELATION_LOCATED_IN] = rtf.RelationTypeFactory.create_relation(voc.RELATION_LOCATED_IN)
         self._relations[voc.RELATION_NEIGHBOR_OF] = rtf.RelationTypeFactory.create_relation(
                 voc.RELATION_NEIGHBOR_OF
         )