Exemplo n.º 1
0
 def _graph_fn_call(self, *inputs):
     if get_backend() == "tf":
         concat_output = self.layer.call(force_list(inputs))
         # Add batch/time-rank information.
         concat_output._batch_rank = 0 if self.time_major is False else 1
         if self.in_space_0.has_time_rank:
             concat_output._time_rank = 0 if self.time_major is True else 1
         return concat_output
     elif get_backend() == "pytorch":
         return nn.Sequential(force_list(inputs))
Exemplo n.º 2
0
    def get_action(self,
                   states,
                   internals=None,
                   use_exploration=True,
                   apply_preprocessing=True,
                   extra_returns=None):
        # TODO: common pattern - move to Agent
        """
        Args:
            extra_returns (Optional[Set[str],str]): Optional string or set of strings for additional return
                values (besides the actions). Possible values are:
                - 'preprocessed_states': The preprocessed states after passing the given states through the
                preprocessor stack.
                - 'internal_states': The internal states returned by the RNNs in the NN pipeline.
                - 'used_exploration': Whether epsilon- or noise-based exploration was used or not.

        Returns:
            tuple or single value depending on `extra_returns`:
                - action
                - the preprocessed states
        """
        extra_returns = {extra_returns} if isinstance(
            extra_returns, str) else (extra_returns or set())
        # States come in without preprocessing -> use state space.
        if apply_preprocessing:
            call_method = self.root_component.get_preprocessed_state_and_action
            batched_states = self.state_space.force_batch(states)
        else:
            call_method = self.root_component.action_from_preprocessed_state
            batched_states = states
        remove_batch_rank = batched_states.ndim == np.asarray(states).ndim + 1

        # Increase timesteps by the batch size (number of states in batch).
        batch_size = len(batched_states)
        self.timesteps += batch_size

        # Control, which return value to "pull" (depending on `additional_returns`).
        return_ops = [0, 1] if "preprocessed_states" in extra_returns else [0]
        ret = force_list(
            self.graph_executor.execute((
                call_method,
                [batched_states,
                 not use_exploration],  # deterministic = not use_exploration
                # 0=preprocessed_states, 1=action
                return_ops)))
        # Convert Gumble (relaxed one-hot) sample back into int type for all discrete composite actions.
        if isinstance(self.action_space, ContainerSpace):
            ret[0] = ret[0].map(mapping=lambda key, action: np.argmax(
                action, axis=-1).astype(action.dtype) if isinstance(
                    self.flat_action_space[key], IntBox) else action)
        elif isinstance(self.action_space, IntBox):
            ret[0] = np.argmax(ret[0], axis=-1).astype(self.action_space.dtype)

        if remove_batch_rank:
            ret[0] = strip_list(ret[0])

        if "preprocessed_states" in extra_returns:
            return ret[0], ret[1]
        else:
            return ret[0]
Exemplo n.º 3
0
        def method(self_, *inputs, **kwargs):
            args_ = inputs
            kwargs_ = kwargs
            for i, sub_component in enumerate(self_.sub_components.values()):  # type: Component
                # TODO: python-Components: For now, we call each preprocessor's graph_fn
                #  directly (assuming that inputs are not ContainerSpaces).
                if self_.backend == "python" or get_backend() == "python":
                    graph_fn = getattr(sub_component, "_graph_fn_" + component_api_method_name)
                    # if sub_component.api_methods[components_api_method_name].add_auto_key_as_first_param:
                    #    results = graph_fn("", *args_)  # TODO: kwargs??
                    # else:
                    results = graph_fn(*args_)
                elif get_backend() == "pytorch":
                    # Do NOT convert to tuple, has to be in unpacked again immediately.n
                    results = getattr(sub_component, component_api_method_name)(*force_list(args_))
                else:  # if get_backend() == "tf":
                    results = getattr(sub_component, component_api_method_name)(*args_, **kwargs_)

                # Recycle args_, kwargs_ for reuse in next sub-Component's API-method call.
                if isinstance(results, dict):
                    args_ = ()
                    kwargs_ = results
                else:
                    args_ = force_tuple(results)
                    kwargs_ = {}

            if args_ == ():
                return kwargs_
            elif len(args_) == 1:
                return args_[0]
            else:
                return args_
Exemplo n.º 4
0
 def _graph_fn_apply(self, *inputs):
     if get_backend() == "tf":
         concat_output = tf.concat(values=inputs, axis=self.axis)
         # Add batch/time-rank information.
         concat_output._batch_rank = 0 if self.time_major is False else 1
         if self.in_space_0.has_time_rank:
             concat_output._time_rank = 0 if self.time_major is True else 1
         return concat_output
     elif get_backend() == "pytorch":
         return torch.cat(force_list(inputs))
Exemplo n.º 5
0
    def __init__(self,
                 level_id,
                 observations="RGB_INTERLEAVED",
                 actions=None,
                 frameskip=4,
                 config=None,
                 renderer="software",
                 seed=None,
                 level_cache=None):
        """
        Args:
            level_id (str): Specifier of the level to play, e.g. 'seekavoid_arena_01'.
            observations (Union[str,List[str]]): String specifier(s) for the observation(s) to be used with the
                given level. Will be converted into either a (single) BoxSpace or a Tuple (of BoxSpaces).
                See deepmind's documentation for all available observations.
            actions (Optional[List[dict]]): The RLgraph action spec (currently, only IntBox (shape=()) RLgraph action
                spaces are supported) that will be translated from and to the deepmind Lab actions.
                List slots correspond to the single int-actions, list items are dicts with:
                key=deepmind Lab partial action name e.g. LOOK_LEFT_RIGHT_PIXELS_PER_FRAME.
                value=the value for that deepmind Lab partial action e.g. -100.
            frameskip (Optional[Tuple[int,int],int]): How many frames should be skipped with (repeated action and
                accumulated reward). E.g. (2,5) -> Uniformly pull from set [2,3,4].
                Default: 4.
            config (Optional[dict]): The `config` parameter to be passed into the Lab's constructor.
                Supports 'width', 'height', 'fps', and other useful parameters.
                Values must be given as string values. e.g. dict(width='96')
            renderer (str): The `renderer` parameter to be passed into the Lab's constructor.
            seed (Optional[int]): An optional seed to use to initialize a numpy random state object, which is then used
                to seed all occurring resets in a deterministic fashion.
            level_cache (Optional[object]): An optional custom level caching object to help increase performance
                when playing many repeating levels. Will be passed as is into the Lab's constructor.
        """
        # Create the wrapped deepmind lab level object.
        self.level_id = level_id
        observations = force_list(observations)
        config = default_dict(config, dict(width='96', height='72',
                                           fps='60'))  # Default config.
        self.level = deepmind_lab.Lab(self.level_id,
                                      observations,
                                      config=config,
                                      renderer=renderer,
                                      level_cache=level_cache)

        # Dict mapping a discrete action (int) - we don't support continuous actions yet - into a
        # deepmind Lab action vector.
        self.action_list, action_space = self.define_actions(actions)
        observation_space = self.define_observations(observations)
        super(DeepmindLabEnv, self).__init__(observation_space, action_space)

        self.frameskip = frameskip
        self.random_state = np.random.RandomState(
            seed=seed or int(time.time()))
        self.reset()
Exemplo n.º 6
0
    def _graph_fn_call(self, *inputs):
        # Simple translation from dict to tuple-input.
        if self.dict_keys is not None:
            inputs = [inputs[0][key] for key in self.dict_keys]

        if get_backend() == "tf":
            concat_output = tf.concat(values=inputs, axis=self.axis)
            # Add batch/time-rank information.
            concat_output._batch_rank = 0 if self.time_major is False else 1
            if self.in_space_0.has_time_rank:
                concat_output._time_rank = 0 if self.time_major is True else 1
            return concat_output
        elif get_backend() == "pytorch":
            return torch.cat(force_list(inputs))
Exemplo n.º 7
0
 def _graph_fn_calculate_gradients(self, variables, loss):
     """
     Args:
         variables (DataOpTuple): A list of variables to calculate gradients for.
         loss (SingeDataOp): The total loss over a batch to be minimized.
     """
     if get_backend() == "tf":
         var_list = list(variables.values()) if isinstance(variables, dict) else force_list(variables)
         grads_and_vars = self.optimizer.compute_gradients(
             loss=loss,
             var_list=var_list
         )
         if self.clip_grad_norm is not None:
             for i, (grad, var) in enumerate(grads_and_vars):
                 if grad is not None:
                     grads_and_vars[i] = (tf.clip_by_norm(t=grad, clip_norm=self.clip_grad_norm), var)
         return DataOpTuple(grads_and_vars)
Exemplo n.º 8
0
    def _graph_fn_apply(self, preprocessing_inputs):
        """
        Sequences (stitches) together the incoming inputs by using our buffer (with stored older records).
        Sequencing happens within the last rank if `self.add_rank` is False, otherwise a new rank is added at the end
        for the sequencing.

        Args:
            preprocessing_inputs (FlattenedDataOp): The FlattenedDataOp to be sequenced.
                One sequence is generated separately for each SingleDataOp in api_methods.

        Returns:
            FlattenedDataOp: The FlattenedDataOp holding the sequenced SingleDataOps as values.
        """
        # A normal (index != -1) assign op.
        if self.backend == "python" or get_backend() == "python":
            if self.index == -1:
                for _ in range_(self.sequence_length):
                    self.deque.append(preprocessing_inputs)
            else:
                self.deque.append(preprocessing_inputs)
            self.index = (self.index + 1) % self.sequence_length

            if self.add_rank:
                sequence = np.stack(self.deque, axis=-1)
            # Concat the sequence items in the last rank.
            else:
                sequence = np.concatenate(self.deque, axis=-1)

            # TODO move into transpose component.
            if self.in_data_format == "channels_last" and self.out_data_format == "channels_first":
                sequence = sequence.transpose((0, 3, 2, 1))

            return sequence
        elif get_backend() == "pytorch":
            if self.index == -1:
                for _ in range_(self.sequence_length):
                    if isinstance(preprocessing_inputs, dict):
                        for key, value in preprocessing_inputs.items():
                            self.deque.append(value)
                    else:
                        self.deque.append(preprocessing_inputs)
            else:
                if isinstance(preprocessing_inputs, dict):
                    for key, value in preprocessing_inputs.items():
                        self.deque.append(value)
                        self.index = (self.index + 1) % self.sequence_length
                else:
                    self.deque.append(preprocessing_inputs)
                    self.index = (self.index + 1) % self.sequence_length

            if self.add_rank:
                sequence = torch.stack(torch.tensor(self.deque), dim=-1)
            # Concat the sequence items in the last rank.
            else:
                data = []
                for t in self.deque:
                    if isinstance(t, torch.Tensor):
                        data.append(t)
                    else:
                        data.append(torch.tensor(t))
                sequence = torch.cat(data, dim=-1)

            # TODO remove when transpose component implemented.
            if self.in_data_format == "channels_last" and self.out_data_format == "channels_first":
                # Problem: PyTorch does not have data format options in conv layers ->
                # only channels first supported.
                # -> Confusingly have to transpose.
                # B W H C -> B C W H
                # e.g. atari: [4 84 84 4] -> [4 4 84 84]
                sequence = sequence.permute(0, 3, 2, 1)

            return sequence
        elif get_backend() == "tf":
            # Assigns the input_ into the buffer at the current time index.
            def normal_assign():
                assigns = list()
                for key_, value in preprocessing_inputs.items():
                    assign_op = self.assign_variable(ref=self.buffer[key_][self.index], value=value)
                    assigns.append(assign_op)
                return assigns

            # After a reset (time index is -1), fill the entire buffer with `self.sequence_length` x input_.
            def after_reset_assign():
                assigns = list()
                for key_, value in preprocessing_inputs.items():
                    multiples = (self.sequence_length,) + tuple([1] * get_rank(value))
                    input_ = tf.expand_dims(input=value, axis=0)
                    assign_op = self.assign_variable(
                        ref=self.buffer[key_], value=tf.tile(input=input_, multiples=multiples)
                    )
                    assigns.append(assign_op)
                return assigns

            # Insert the input at the correct index or fill empty buffer entirely with input.
            insert_inputs = tf.cond(pred=(self.index >= 0), true_fn=normal_assign, false_fn=after_reset_assign)

            # Make sure the input has been inserted.
            with tf.control_dependencies(control_inputs=force_list(insert_inputs)):
                # Then increase index by 1.
                index_plus_1 = self.assign_variable(ref=self.index, value=((self.index + 1) % self.sequence_length))

            # Then gather the output.
            with tf.control_dependencies(control_inputs=[index_plus_1]):
                sequences = FlattenedDataOp()
                # Collect the correct previous inputs from the buffer to form the output sequence.
                for key in preprocessing_inputs.keys():
                    n_in = [self.buffer[key][(self.index + n) % self.sequence_length]
                            for n in range_(self.sequence_length)]

                    # Add the sequence-rank to the end of our inputs.
                    if self.add_rank:
                        sequence = tf.stack(values=n_in, axis=-1)
                    # Concat the sequence items in the last rank.
                    else:
                        sequence = tf.concat(values=n_in, axis=-1)

                    # Must pass the sequence through a placeholder_with_default dummy to set back the
                    # batch rank to '?', instead of 1 (1 would confuse the auto Space inference).
                    sequences[key] = tf.placeholder_with_default(
                        sequence, shape=(None,) + tuple(get_shape(sequence)[1:])
                    )
            # TODO implement transpose
                return sequences
Exemplo n.º 9
0
    def init_gpus(self):
        """
        Parses GPU specs and initializes GPU devices by adjusting visible CUDA devices to
        environment and setting memory allocation options.
        """
        gpu_spec = self.execution_spec.get("gpu_spec", None)

        if gpu_spec is not None:
            self.gpus_enabled = gpu_spec.get("gpus_enabled", False)
            self.max_usable_gpus = gpu_spec.get("max_usable_gpus", 0)

            if self.gpus_enabled:
                assert self.max_usable_gpus > 0, "ERROR: GPUs are enabled but max_usable_gpus are not >0 but {}".\
                    format(self.max_usable_gpus)
                gpu_names = sorted([x.name for x in self.local_device_protos if x.device_type == 'GPU'])

                # Set fake_gpus to True iff `fake_gpus_if_necessary` is True AND we really don't have any GPUs.
                self.fake_gpus = gpu_spec.get("fake_gpus_if_necessary", False) and len(gpu_names) == 0

                if self.fake_gpus is False:
                    cuda_visible_devices = gpu_spec.get("cuda_visible_devices", None)
                    if len(gpu_names) < self.max_usable_gpus:
                        self.logger.warn("WARNING: max_usable_gpus is {} but only {} gpus are locally visible. "
                                         "Using all available GPUs.".format(self.max_usable_gpus, len(gpu_names)))

                    # Indicate specific CUDA devices to be used.
                    if cuda_visible_devices is not None:
                        if not isinstance(cuda_visible_devices, (str, int, list)):
                            raise ValueError(
                                "ERROR: 'cuda_visible_devices' must be int/string or list of device index-values, e.g. "
                                "[0,2] or '0' or 1, but is: {}".format(type(cuda_visible_devices))
                            )
                        cuda_visible_devices = force_list(cuda_visible_devices)
                        num_provided_cuda_devices = len(cuda_visible_devices)
                        use_names = [gpu_names[int(device_id)] for device_id in cuda_visible_devices]
                        cuda_visible_devices = ",".join(cuda_visible_devices)

                        # Must match number of allowed GPUs.
                        assert self.max_usable_gpus == num_provided_cuda_devices,\
                            "ERROR: Provided CUDA {} devices: {}, but max_usable_gpus is {}. Must match!"

                        # Expose these devices.
                        self.logger.info("GPU strategy: Exposing CUDA devices with ids: {}".format(cuda_visible_devices))
                        os.environ["CUDA_VISIBLE_DEVICES"] = cuda_visible_devices
                        self.gpu_names = use_names

                    # Assign as many as specified.
                    else:
                        cuda_visible_devices = []
                        use_names = []
                        for i, name in enumerate(gpu_names):
                            if len(use_names) < self.max_usable_gpus:
                                use_names.append(name)
                                cuda_visible_devices.append(str(i))
                        os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(cuda_visible_devices)
                        self.logger.info("GPU strategy initialized with GPUs enabled: {}".format(use_names))
                        self.gpu_names = use_names

                    self.num_gpus = len(self.gpu_names)
                    self.available_devices.extend(self.gpu_names)
                    per_process_gpu_memory_fraction = gpu_spec.get("per_process_gpu_memory_fraction", None)
                    if per_process_gpu_memory_fraction is not None:
                        self.tf_session_config.gpu_options.per_process_gpu_memory_fraction = per_process_gpu_memory_fraction

                    self.tf_session_config.gpu_options.allow_growth = gpu_spec.get("allow_memory_growth", False)
        else:
            # Do not allow any GPUs to be used.
            self.gpus_enabled = False
            self.logger.info("gpu_spec is None, disabling GPUs.")
Exemplo n.º 10
0
    def execute(self, *api_method_calls):
        # Have to call each method separately.
        ret = []
        for api_method in api_method_calls:
            if api_method is None:
                continue
            elif isinstance(api_method, (list, tuple)):
                # Which ops are supposed to be returned?
                op_indices_to_return = api_method[2] if len(
                    api_method) > 2 else None
                params = util.force_list(api_method[1])
                api_method = api_method[0]

                # TODO where to determine this? exec spec?
                requires_grad = False
                if "update" in api_method:
                    requires_grad = True
                tensor_params = force_torch_tensors(
                    params=params, requires_grad=requires_grad)
                api_ret = self.graph_builder.execute_define_by_run_op(
                    api_method, tensor_params)
                if not isinstance(api_ret, list) and not isinstance(
                        api_ret, tuple):
                    api_ret = [api_ret]
                to_return = []
                if op_indices_to_return is not None:
                    # Build return ops in correct order.
                    # TODO clarify op indices order vs tensorflow.
                    for i in sorted(op_indices_to_return):
                        op_result = api_ret[i]
                        if isinstance(op_result, torch.Tensor
                                      ) and op_result.requires_grad is True:
                            op_result = op_result.detach()
                        to_return.append(op_result)

                else:
                    # Just return everything in the order it was returned by the API method.
                    if api_ret is not None:
                        for op_result in api_ret:
                            if isinstance(
                                    op_result, torch.Tensor
                            ) and op_result.requires_grad is True:
                                op_result = op_result.detach()
                            to_return.append(op_result)

                # Clean and return.
                self.clean_results(ret, to_return)
            else:
                # Api method is string without args:
                to_return = []
                api_ret = self.graph_builder.execute_define_by_run_op(
                    api_method)
                if api_ret is None:
                    continue
                if not isinstance(api_ret, list) and not isinstance(
                        api_ret, tuple):
                    api_ret = [api_ret]
                for op_result in api_ret:
                    if isinstance(
                            op_result,
                            torch.Tensor) and op_result.requires_grad is True:
                        op_result = op_result.detach()
                    to_return.append(op_result)

                # Clean and return.
                self.clean_results(ret, to_return)

        # Unwrap if len 1.
        ret = ret[0] if len(ret) == 1 else ret
        return ret
Exemplo n.º 11
0
        def method(self_, *inputs, **kwargs):
            # Fold time rank? For now only support 1st arg folding/unfolding.
            original_input = inputs[0]
            if fold_time_rank is True:
                args_ = tuple([self.folder.apply(original_input)] +
                              list(inputs[1:]))
            else:
                # TODO: If only unfolding: Assume for now that 2nd input is the original one (so we can infer
                # TODO: batch/time dims).
                if unfold_time_rank is True:
                    assert len(inputs) >= 2, \
                        "ERROR: In Stack: If unfolding w/o folding, second arg must be the original input!"
                    original_input = inputs[1]
                    args_ = tuple([inputs[0]] + list(inputs[2:]))
                else:
                    args_ = inputs
            kwargs_ = kwargs

            for i, sub_component in enumerate(
                    self_.sub_components.values()):  # type: Component
                if sub_component.scope in [
                        "time-rank-folder_", "time-rank-unfolder_"
                ]:
                    continue
                # TODO: python-Components: For now, we call each preprocessor's graph_fn
                #  directly (assuming that inputs are not ContainerSpaces).
                if self_.backend == "python" or get_backend() == "python":
                    graph_fn = getattr(
                        sub_component,
                        "_graph_fn_" + sub_components_api_method_name)
                    # if sub_component.api_methods[components_api_method_name].add_auto_key_as_first_param:
                    #    results = graph_fn("", *args_)  # TODO: kwargs??
                    # else:
                    results = graph_fn(*args_)
                elif get_backend() == "pytorch":
                    # Do NOT convert to tuple, has to be in unpacked again immediately.n
                    results = getattr(
                        sub_component,
                        sub_components_api_method_name)(*force_list(args_))
                else:  # if get_backend() == "tf":
                    results = getattr(sub_component,
                                      sub_components_api_method_name)(
                                          *args_, **kwargs_)

                # Recycle args_, kwargs_ for reuse in next sub-Component's API-method call.
                if isinstance(results, dict):
                    args_ = ()
                    kwargs_ = results
                else:
                    args_ = force_tuple(results)
                    kwargs_ = {}

            if args_ == ():
                # Unfold time rank? For now only support 1st arg folding/unfolding.
                if unfold_time_rank is True:
                    assert len(kwargs_) == 1,\
                        "ERROR: time-rank-unfolding not supported for more than one NN-return value!"
                    key = next(iter(kwargs_))
                    kwargs_ = {
                        key: self.unfolder.apply(kwargs_[key], original_input)
                    }
                return kwargs_
            else:
                # Unfold time rank? For now only support 1st arg folding/unfolding.
                if unfold_time_rank is True:
                    assert len(args_) == 1,\
                        "ERROR: time-rank-unfolding not supported for more than one NN-return value!"
                    args_ = tuple(
                        [self.unfolder.apply(args_[0], original_input)] +
                        list(args_[1 if fold_time_rank is True else 2:]))
                if len(args_) == 1:
                    return args_[0]
                else:
                    return args_
Exemplo n.º 12
0
        def call(*args):
            if isinstance(self.output_spaces, dict):
                assert method_name in self.output_spaces, "ERROR: Method '{}' not specified in output_spaces: {}!".\
                    format(method_name, self.output_spaces)
                specs = self.output_spaces[method_name]
            else:
                specs = self.output_spaces(method_name)

            if specs is None:
                raise RLGraphError(
                    "No Space information received for method '{}:{}'".format(
                        self.specifiable_class.__name__, method_name))

            dtypes = []
            shapes = []
            return_slots = []
            for i, space in enumerate(force_list(specs)):
                assert not isinstance(space, ContainerSpace)
                # Expecting an op (space 0).
                if space == 0:
                    dtypes.append(0)
                    shapes.append(0)
                    return_slots.append(i)
                # Expecting a tensor.
                elif space is not None:
                    dtypes.append(convert_dtype(space.dtype))
                    shapes.append(space.shape)
                    return_slots.append(i)

            if get_backend() == "tf":
                # This function will send the method-call-comment via the out-pipe to the remote (server) Specifiable
                # object - all in-graph - and return the results to be used further by other graph ops.
                def py_call(*call_args):
                    call_args = [
                        arg.decode('UTF-8') if isinstance(arg, bytes) else arg
                        for arg in call_args
                    ]
                    try:
                        self.out_pipe.send(call_args)
                        received_results = self.out_pipe.recv()

                        # If an error occurred, it'll be passed back through the pipe.
                        if isinstance(received_results, Exception):
                            raise received_results
                        elif received_results is not None:
                            return received_results

                    except Exception as e:
                        if isinstance(e, IOError):
                            raise StopIteration()  # Clean exit.
                        else:
                            print("ERROR: Sent={} Exception={}".format(
                                call_args, e))
                            raise

                results = tf.py_func(py_call, (method_name, ) + tuple(args),
                                     dtypes,
                                     name=method_name)

                # Force known shapes on the returned tensors.
                for i, (result, shape) in enumerate(zip(results, shapes)):
                    # Not an op (which have shape=0).
                    if shape != 0:
                        result.set_shape(shape)
            else:
                raise NotImplementedError

            return results[0] if len(dtypes) == 1 else tuple(results)
Exemplo n.º 13
0
    def from_spec(cls, spec=None, **kwargs):
        """
        Uses the given spec to create an object.
        If `spec` is a dict, an optional "type" key can be used as a "constructor hint" to specify a certain class
        of the object.
        If `spec` is not a dict, `spec`'s value is used directly as the "constructor hint".

        The rest of `spec` (if it's a dict) will be used as kwargs for the (to-be-determined) constructor.
        Additional keys in **kwargs will always have precedence (overwrite keys in `spec` (if a dict)).
        Also, if the spec-dict or **kwargs contains the special key "_args", it will be popped from the dict
        and used as *args list to be passed separately to the constructor.

        The following constructor hints are valid:
        - None: Use `cls` as constructor.
        - An already instantiated object: Will be returned as is; no constructor call.
        - A string or an object that is a key in `cls`'s `__lookup_classes__` dict: The value in `__lookup_classes__`
            for that key will be used as the constructor.
        - A python callable: Use that as constructor.
        - A string: Either a json filename or the name of a python module+class (e.g. "rlgraph.components.Component")
            to be Will be used to

        Args:
            spec (Optional[dict]): The specification dict.

        Keyword Args:
            kwargs (any): Optional possibility to pass the c'tor arguments in here and use spec as the type-only info.
                Then we can call this like: from_spec([type]?, [**kwargs for ctor])
                If `spec` is already a dict, then `kwargs` will be merged with spec (overwriting keys in `spec`) after
                "type" has been popped out of `spec`.
                If a constructor of a Specifiable needs an *args list of items, the special key `_args` can be passed
                inside `kwargs` with a list type value (e.g. kwargs={"_args": [arg1, arg2, arg3]}).

        Returns:
            The object generated from the spec.
        """
        # specifiable_type is already a created object of this class -> Take it as is.
        if isinstance(spec, cls):
            return spec

        # `specifiable_type`: Indicator for the Specifiable's constructor.
        # `ctor_args`: *args arguments for the constructor.
        # `ctor_kwargs`: **kwargs arguments for the constructor.
        # Copy so caller can reuse safely.
        spec = deepcopy(spec)
        if isinstance(spec, dict):
            if "type" in spec:
                specifiable_type = spec.pop("type", None)
            else:
                specifiable_type = None
            ctor_kwargs = spec
            ctor_kwargs.update(kwargs)  # give kwargs priority
        else:
            specifiable_type = spec
            ctor_kwargs = kwargs
        # Special `_args` field in kwargs for *args-utilizing constructors.
        ctor_args = force_list(ctor_kwargs.pop("_args", []))

        # Figure out the actual constructor (class) from `type_`.
        # None: Try __default__object (if no args/kwargs), only then constructor of cls (using args/kwargs).
        if specifiable_type is None:
            # We have a default constructor that was defined directly by cls (not by its children).
            if cls.__default_constructor__ is not None and ctor_args == [] and \
                    (not hasattr(cls.__bases__[0], "__default_constructor__") or
                     cls.__bases__[0].__default_constructor__ is None or
                     cls.__bases__[0].__default_constructor__ is not cls.__default_constructor__
                    ):
                constructor = cls.__default_constructor__
                # Default partial's keywords into ctor_kwargs.
                if isinstance(constructor, partial):
                    kwargs = default_dict(ctor_kwargs, constructor.keywords)
                    constructor = partial(constructor.func, **kwargs)
                    ctor_kwargs = {}  # erase to avoid duplicate kwarg error
            # Try our luck with this class itself.
            else:
                constructor = cls
        # Try the __lookup_classes__ of this class.
        else:
            constructor = cls.lookup_class(specifiable_type)

            # Found in cls.__lookup_classes__.
            if constructor is not None:
                pass
            # Python callable.
            elif callable(specifiable_type):
                constructor = specifiable_type
            # A string: Filename or a python module+class.
            elif isinstance(specifiable_type, str):
                if re.search(r'\.(yaml|yml|json)$', specifiable_type):
                    return cls.from_file(specifiable_type, *ctor_args,
                                         **ctor_kwargs)
                elif specifiable_type.find('.') != -1:
                    module_name, function_name = specifiable_type.rsplit(
                        ".", 1)
                    module = importlib.import_module(module_name)
                    constructor = getattr(module, function_name)
                else:
                    raise RLGraphError(
                        "ERROR: String specifier ({}) in from_spec must be a filename, a module+class, or a key "
                        "into {}.__lookup_classes__!".format(
                            specifiable_type, cls.__name__))

        if not constructor:
            raise RLGraphError("Invalid type: {}".format(specifiable_type))

        # Create object with inferred constructor.
        specifiable_object = constructor(*ctor_args, **ctor_kwargs)
        assert isinstance(
            specifiable_object, constructor.func if isinstance(
                constructor, partial) else constructor)

        return specifiable_object