Пример #1
0
    def get_tensorflow_object_last(self, dst_units):
        """
        Parameters
        ----------
        dst_units:    int
            Number of units that the recurrent cell will have
        """
        try:
            c_ = getattr(tf.keras.layers, self.type)
        except Exception:
            print_failure(
                "The layer of layer_type '" + self.type +
                "' is not a valid tf.keras layer. Please check the "
                "documentation to write the correct way to define this layer. ")

        self.parameters['units'] = dst_units  # can we assume that it will always be units??

        try:
            layer = c_(**self.parameters)
        except Exception:
            parameters_string = ''
            for k, v in self.parameters.items():
                parameters_string += k + ': ' + v + '\n'
            print_failure(
                "One of the parameters passed to the layer of layer_type '" + self.type + "' is incorrect. \n " +
                "You have defined the following parameters: \n" + parameters_string)

        return layer
Пример #2
0
    def calculate(self, product_input1, product_input2):
        """
        Parameters
        ----------
        product_input1:    tensor
           Input 1
        product_input2:    tensor
           Input 2
        """

        try:
            if self.type_product == 'dot_product':
                result = tf.tensordot(product_input1, product_input2, axes=[[1], [1]])

                # the correct values are in the diagonal (IMPROVE THIS)
                # This does the dot product row by row (so independently for each adjacency)
                result = tf.linalg.tensor_diag_part(result)
                result = tf.expand_dims(result, axis=-1)

            elif self.type_product == 'element_wise':
                result = tf.math.multiply(product_input1, product_input2)

            elif self.type_product == 'mat_mult':
                result = tf.tensordot(product_input1, product_input2, axes=[[2], [1]])
                result = tf.squeeze(result, axis=2)

            result = tf.cast(result, tf.float32)

            return result

        except:
            print_failure(
                'The product operation between ' + product_input1 + ' and ' + product_input2 +
                ' failed. Check that the dimensions are compatible.')
Пример #3
0
    def get_tensorflow_object(self, dst_dim):
        """
        Parameters
        ----------
        dst_dim:    int
            Dimension of the destination nodes. Thus, number of units of the RNN model
        """

        self.parameters['units'] = dst_dim
        try:
            c_ = getattr(tf.keras.layers, self.type + 'Cell')
        except Exception:
            print_failure(
                "Error when trying to define a RNN of layer_type '" + self.type +
                "' since this layer_type does not exist. Check the valid RNN cells that Keras allow to define.")

        try:
            layer = c_(**self.parameters)
        except Exception:
            print_failure(
                "Error when creating the RNN of layer_type '" + self.type +
                "' since invalid parameters were passed. Check the documentation to check which "
                "parameters are acceptable or check the spelling of the parameters' names.")

        return layer
Пример #4
0
    def find_total_input_dim(self, dimensions, calculations):
        """
        Parameters
        ----------
        dimensions:    dict
           Dictionary with the dimensions of each tensor (indexed by name)
        calculations:    dict
           Dictionary with the current calculations throughout the execution of the GNN model
        """
        if self.input is not None:
            input_nn = self.input
            input_dim = 0
            dimension = None
            for i in input_nn:
                if '_initial_state' in i:
                    i = i.split('_initial_state')[0]

                if i in dimensions:
                    dimension = dimensions[i]
                elif i + '_out_dim' in calculations:
                    dimension = calculations[i + '_out_dim']  # take the dimension from here or from self.dimensions
                else:
                    print_failure("Keyword " + i + " used in the model definition was not recognized")

                input_dim += dimension
            return input_dim
Пример #5
0
    def stream_read_json(self, f):
        """
        Parameters
        ----------
        f:
            Input data
        """
        # check that it is a valid array of objects
        pos1 = f.read(1)
        if pos1 != '[':
            print_failure(
                "Error because the dataset files must be an array of json objects, and not single json objects")

        start_pos = 1
        while True:
            try:
                obj = json.load(f)
                yield obj
                return
            except json.JSONDecodeError as e:
                f.seek(start_pos)
                json_str = f.read(e.pos)
                obj = json.loads(json_str)
                start_pos += e.pos + 1
                a = f.read(1)  # this 1 is the coma or the final symbol

                if a == self.end_symbol or a == ']':
                    yield obj
                    return
                yield obj
Пример #6
0
    def obtain_total_input_dim_message(self, dimensions, calculations, dst_name, src):
        """
        Parameters
        ----------
        dimensions:    dict
           Dictionary with the dimensions of each tensor (indexed by name)
        calculations:    dict
           Dictionary with the current calculations throughout the execution of the GNN model
        dst_name: str
            Name of the destination entity
        src: Source_mp object
            Object that includes the information about the source entity of the mp
        """
        # Find out the dimension of the model
        input_nn = self.input
        input_dim = 0
        for i in input_nn:
            if i == 'source':
                input_dim += int(dimensions.get(src.name))
            elif i == 'destination':
                input_dim += int(dimensions.get(dst_name))
            elif i in dimensions:
                input_dim += int(dimensions[i])
            elif i + '_dim' in calculations:
                input_dim += dimensions
            else:
                print_failure("Keyword " + i + " used in the message passing was not recognized.")

        return input_dim
Пример #7
0
    def calculate(self, src_states, adj_src, dst_states, adj_dst):
        """
        Parameters
        ----------
        src_states:    tensor
           Input 1
        adj_src:    tensor
            Adj src -> dest
        dst_states:     tensor
            Input 2
        adj_dst:    tensor
            Adj dst -> src
        """

        # obtain the extended input (by extending it to the number of adjacencies between them)
        try:
            extended_src = tf.gather(src_states, adj_src)
        except Exception:
            print_failure('Extending the adjacency list ' + str(self.adj_list) +
                          ' was not possible. Check that the indexes of the source of the adjacency '
                          'list match the input given.')

        try:
            extended_dst = tf.gather(dst_states, adj_dst)
        except Exception:
            print_failure('Extending the adjacency list ' + str(self.adj_list) +
                          ' was not possible. Check that the indexes of the destination of '
                          'the adjacency list match the input given.')

        return extended_src, extended_dst
Пример #8
0
    def __prepocess_parameters(self):
        for k, v in self.parameters.items():
            if v == 'None':
                self.parameters[k] = None

            elif v == 'True':
                self.parameters[k] = True

            elif v == 'False':
                self.parameters[k] = False

            elif 'regularizer' in k:
                try:
                    self.parameters[k] = tf.keras.regularizers.l2(float(self.parameters.get(k)))
                except Exception:
                    print_failure("The " + k + " parameter '" + str(self.parameters.get(k)) +
                                  "' in layer of layer_type " + self.type +
                                  " is invalid. Please make sure it is a numerical value.")

            elif 'activation' in k:  # already ensures that it was not None
                try:
                    self.parameters['activation'] = getattr(tf.nn, v)
                except Exception:
                    print_failure("The activation '" + v +
                                  "' is not a valid function from the tf.nn library. Please check the documentation "
                                  "and the spelling of the function.")
Пример #9
0
    def __init__(self, op):
        """
        Parameters
        ----------
        op:    dict
            Dictionary with the data defining this general operation
        """

        self.type = op.get('type')
        self.output_name = op.get('output_name', None)

        self.output_label = op.get('output_label', None)

        if self.output_label is not None:
            # There may be more than one output_label
            self.output_label = [output.split('$')[-1] for output in
                                 self.output_label]  # delete the $ from the output label

        # parse the input of the operation
        self.input = []
        self.source_dataset = False
        self.destination_dataset = False
        if 'input' in op:
            for input_item in op.get('input'):
                if '$source' == input_item or '$destination' == input_item:
                    print_failure(
                        'The keywords source and destination are reserved keywords. Thus, they cannot name feature '
                        'from the dataset. Check that you really meant to use $, indicating that its a feature '
                        'from the dataset')
                else:
                    self.input.append(input_item.split('$')[-1])  # delete the $ from the inputs (if any)
Пример #10
0
    def create_aggregations(self, attrs):
        """
        Parameters
        ----------
        attrs:    dict
            Dictionary with the required attributes for the aggregation (defining the set of operations)
        """
        aggregations = []
        single_embedding = None
        multiple_embedding = None
        for attr in attrs:
            attr_type = attr.get('type')
            if attr_type == 'interleave':
                aggregations.append(InterleaveAggr(attr))
                multiple_embedding = True
            elif attr_type == 'neural_network':
                aggregations.append(
                    FeedForwardOperation(attr, model_role='aggregation'))
                single_embedding = True
            elif attr_type == 'concat':
                aggregations.append(ConcatAggr(attr))
                multiple_embedding = True
            elif attr_type == 'sum':
                aggregations.append(SumAggr(attr))
                single_embedding = True
            elif attr_type == 'mean':
                aggregations.append(MeanAggr(attr))
                single_embedding = True
            elif attr_type == 'min':
                aggregations.append(MinAggr(attr))
                single_embedding = True
            elif attr_type == 'max':
                aggregations.append(MaxAggr(attr))
                single_embedding = True
            elif attr_type == 'std':
                aggregations.append(StdAggr(attr))
                single_embedding = True
            elif attr_type == 'attention':
                aggregations.append(AttentionAggr(attr))
                single_embedding = True
            elif attr_type == 'edge_attention':
                aggregations.append(EdgeAttentionAggr(attr))
                single_embedding = True
            elif attr_type == 'convolution':
                aggregations.append(ConvAggr(attr))
                single_embedding = True
            else:  # this is for the ordered aggregation
                multiple_embedding = True

        if single_embedding and multiple_embedding:
            print_failure(
                "You cannot combine aggregations which return a sequence of tensors, "
                "and aggregations that return a single embedding")

        elif single_embedding:
            return aggregations, 0

        else:
            return aggregations, 1
Пример #11
0
    def generate_from_array(self,
                            data_samples,
                            entity_names,
                            feature_names,
                            output_names,
                            adj_names,
                            interleave_names,
                            additional_input,
                            training,
                            shuffle=False):
        """
        Parameters
        ----------
        data_samples:    [array]
           Array of samples to be processed
        entity_names: [array]
            Name of the entities to be found in the dataset
        feature_names:    [array]
           Name of the features to be found in the dataset
        output_names:    [str]
           Names of the output data to be found in the dataset
        interleave_names:    [array]
           First parameter is the name of the interleave, and the second the destination entity
        additional_input:    [array]
           Name of other vectors that need to be retrieved because they appear in other parts of the model definition
        training:     bool
            Indicates if we are training, and thus a label is required.
        shuffle:    bool
           Shuffle parameter of the dataset
        """

        data_samples = [json.loads(x) for x in data_samples]
        self.entity_names = [x for x in entity_names]
        self.feature_names = [x for x in feature_names]
        self.output_names = output_names
        self.adj_names = adj_names
        self.interleave_names = [[i[0], i[1]] for i in interleave_names]
        self.additional_input = [x for x in additional_input]
        self.training = training

        for sample in data_samples:
            try:
                processed_sample = self.__process_sample(sample)
                yield processed_sample

            except StopIteration:
                pass

            except KeyboardInterrupt:
                sys.exit()

            except Exception as inf:
                print_failure("\n There was an unexpected error: \n" + str(
                    inf) + "\n Please make sure that all the names used in the sample passed ")
Пример #12
0
    def calculate(self, inputs):
        """
        Parameters
        ----------
        inputs:    tensor
        """

        try:
            result = tf.concat(inputs, axis=self.axis)
            return result

        except Exception:
            print_failure(
                'The concat operation failed. Check that the dimensions are compatible.')
            sys.exit(1)
Пример #13
0
 def __read_yaml(self, path, file_name=''):
     """
     Parameters
     ----------
     path:    str
         Path of the json file with the model description
     file_name: str
         Name of the file we aim to read
     """
     if os.path.isfile(path):
         with open(path, 'r') as stream:
             try:
                 return yaml.safe_load(stream)
             except yaml.YAMLError as exc:
                 print_failure("There was the following error in the " + file_name + " file.\n" + str(exc))
     else:
         print_failure("The " + file_name + " file was not found in: " + path)
Пример #14
0
    def get_tensorflow_object(self):
        try:
            c_ = getattr(tf.keras.layers, self.type)
        except Exception:
            print_failure(
                "The layer of layer_type '" + self.type +
                "' is not a valid tf.keras layer. Please check the documentation to "
                "write the correct way to define this layer. ")

        try:
            layer = c_(**self.parameters)
        except Exception:
            parameters_string = ''
            for k, v in self.parameters.items():
                parameters_string += k + ': ' + v + '\n'
            print_failure(
                "One of the parameters passed to the layer of layer_type '" + self.type + "' is incorrect. \n " +
                "You have defined the following parameters: \n" + parameters_string)

        return layer
Пример #15
0
    def construct_tf_model(self, input_dim, dst_dim=None, is_readout=False, dst_name=None):
        """
        Parameters
        ----------
        input_dim:  int
            Dimension of the input of the model
        dst_dim:  int
            Dimension of the destination hs if any
        is_readout: bool
            Is a model used for the readout?
        dst_name:   str
            Name of the destination entity
        """

        model = tf.keras.models.Sequential()
        model.add(tf.keras.Input(shape=input_dim))

        layer_counter = 1
        n = len(self.layers)

        for j in range(n):
            current_layer = self.layers[j]
            try:
                # if it's the last layer and we have defined an output dimension
                if j == (n - 1) and dst_dim is not None:
                    layer_model = current_layer.get_tensorflow_object_last(dst_dim)
                else:
                    layer_model = current_layer.get_tensorflow_object()

                model.add(layer_model)

            except:
                if dst_dim is None:
                    if is_readout:
                        print_failure('The layer ' + str(layer_counter) +
                                      ' of the readout is not correctly defined. Check keras documentation '
                                      'to make sure all the parameters are correct.')
                    else:
                        print_failure('The layer ' + str(layer_counter) +
                                      ' of the message creation neural network in the message passing to ' +
                                      str(dst_name) + ' is not correctly defined. Check keras documentation to '
                                      'make sure all the parameters are correct.')

                else:
                    print_failure('The layer ' + str(
                        layer_counter) + ' of the update neural network in message passing to ' + str(dst_name) +
                                  ' is not correctly defined. Check keras documentation to make sure all the '
                                  'parameters are correct.')

            layer_counter += 1
        output_shape = model.output_shape[-1]
        return [model, output_shape]
Пример #16
0
    def __process_sample(self, sample, file=None):
        """
        Parameters
        ----------
        sample:    dict
            Input sample which is a serialized version (in JSON) of a networkx graph.
        file:    str
            Path to these file (which is useful for error-checking purposes)
        """
        # load the model
        G = json_graph.node_link_graph(sample)

        # Only directed graphs are supported. Error checking message if the graph is undirected
        if not G.is_directed():
            print_failure("IGNNITION received as input an undirected graph, even though it only supports "
                          "(at the moment) directed graphs. Please consider reformating your code accordingly. "
                          "You can double the edges between two nodes (e.g., edge 1-2 can be transformed into 1->2 "
                          "and 2->1) to simulate the same behaviour.")

        if G.is_multigraph():
            print_failure("IGNNITION received as input a multigraph, while these are not yet supported. This means, "
                          "that for every pair of nodes, only one edge with the same source and destination can exist "
                          "(e.g., you cannot have two edges 1->2 and 1->2. Notice that 1->2 and 2->1 does not incur "
                          "in this problem.")

        entity_counter = {}
        mapping = {}
        data = {}

        for name in self.entity_names:
            entity_counter[name] = 0

        list_nodes = list(G.nodes())
        for node_name in list_nodes:
            attributes = G.nodes[node_name]

            if 'entity' not in attributes:
                print_failure(
                    "Error in the dataset file located in '" + file + ". The node named'" + str(node_name)
                    + "' was not assigned an entity.")

            entity_name = attributes['entity']
            new_node_name = entity_name + '_{}'
            num_node = entity_counter[entity_name]
            entity_counter[entity_name] += 1

            mapping[node_name] = new_node_name.format(num_node)

        # save the number of nodes of each entity
        for name in self.entity_names:
            data['num_' + name] = entity_counter[name]

        # rename the name of the nodes to a mapping that also indicates its entity layer_type
        D_G = nx.relabel_nodes(G, mapping)

        # discard if the graph is empty
        if not D_G.edges():
            print_info("\nA sample was discarded because the graph is empty (has no edges).")
            raise StopIteration

        # load the features (all the features are set to be lists. So we always return a list of lists)
        for f in self.feature_names:
            try:
                features_dict = nx.get_node_attributes(D_G, f)
                feature_vals = np.array(list(features_dict.values()))
                entity_names = set([name.split('_')[0] for name in features_dict.keys()])   #indicates the (unique)
                # names of the entities that have that feature

                if len(entity_names) > 1:
                    entities_string = functools.reduce(lambda x,y: str(x) + ',' + str(y), entity_names )
                    print_failure("The feature " + f + " was defined in several entities(" + entities_string +
                                  "). The feature names should be unique for each layer_type of node.")

                # it should always be a 2d array
                if len(np.shape(feature_vals)) == 1:
                    feature_vals = np.expand_dims(feature_vals, axis=-1)

                if feature_vals.size == 0:
                    message = "The feature " + f + " was used in the model_description.yaml file " \
                                                   "but was not defined in the dataset."
                    if file is not None:
                        message = "Error in the dataset file located in '" + file + ".\n" + message
                    raise Exception(message)
                else:
                    data[f] = feature_vals

            except:
                message = "The feature " + f + " was used in the model_description.yaml file " \
                                               "but was not defined in the dataset."
                if file is not None:
                    message = "Error in the dataset file located in '" + file + ".\n" + message
                raise Exception(message)

        # take other inputs if needed (check that they might be global features)
        for a in self.additional_input:
            # 1) try to see if this name has been defined as a node attribute
            node_dict = nx.get_node_attributes(D_G, a)
            node_attr = np.array(list(node_dict.values()))
            entity_names = set([name.split('_')[0] for name in node_dict.keys()])  # indicates the (unique) names
            # of the entities that have that feature

            if len(entity_names) > 1:
                entities_string = functools.reduce(lambda x, y: str(x) + ',' + str(y), entity_names)
                print_failure(
                    "The feature " + a + " was defined in several entities(" + entities_string +
                    "). The feature names should be unique for each layer_type of node.")

            # it should always be a 2d array
            if len(np.shape(node_attr)) == 1:
                node_attr = np.expand_dims(node_attr, axis=-1)

            # 2) try to see if this name has been defined as an edge feature
            edge_dict = nx.get_edge_attributes(D_G, a)
            edge_attr = np.array(list(edge_dict.values()))
            entity_names = set([(pair[0].split('_')[0], pair[1].split('_')[0]) for pair in edge_dict.keys()])  #
            # indicates the (unique) names of the entities that have that feature
            # obtain the entities, with a small token indicating if it is source or destination

            # Problem: When we transform an undirected graph to directed, we double all the edges. Hence,
            # we still need to differentiate between source and destination entities?? Solution: Allow only directed??

            # for now, check that the name is unique for every src-dst. Problem: One node connected to another but
            # the reverse to other nodes??
            if len(entity_names) > 2:
                #print(entity_names)
                entities_string = functools.reduce(lambda x, y: str(x) + ',' + str(y), entity_names)
                print_failure(
                    "The edge feature " + a + " was defined in connecting two different source-destination entities(" +
                    entities_string + "). Make sure that an edge feature is unique for a given pair of entities "
                                      "(types of nodes).")


            # it should always be a 2d array
            if len(np.shape(edge_attr)) == 1:
                edge_attr = np.expand_dims(edge_attr, axis=-1)


            # 3) try to see if this name has been defined as a graph feature
            graph_attr = [D_G.graph[a]] if a in D_G.graph else []

            # Check that this name has not been defined both as node features and as edge_features
            if node_attr.size != 0 and edge_attr.size != 0 and len(graph_attr) != 0:
                print_failure("The feature " + a + "was defined both as node feature, edge feature and graph feature. "
                                                   "Please use unique names in this case.")
            elif node_attr.size != 0 and edge_attr.size != 0:
                print_failure("The feature " + a + "was defined both as node feature and as edge feature. Please use "
                                                   "unique names in this case.")
            elif node_attr.size != 0 and len(graph_attr) != 0:
                print_failure("The feature " + a + "was defined both as node feature and as graph feature. Please use "
                                                   "unique names in this case.")


            # Return the correct value
            if node_attr.size != 0:
                data[a] = node_attr
            elif edge_attr.size != 0:
                data[a] = edge_attr
            elif a in D_G.graph:
                data[a] = graph_attr
            else:
                message = 'The data named "' + a + '" was used in the model_description.yaml file ' \
                                                   'but was not defined in the dataset.'
                if file is not None:
                    message = "Error in the dataset file located in '" + file + ".\n" + message
                raise Exception(message)

        if self.training:
            # collect the output (if there is more than one, concatenate them on axis=1
            # limitation: all the outputs must be of the same layer_type (same number of elements)
            final_output = []
            for output in self.output_names:
                try:
                    aux = list(nx.get_node_attributes(D_G, output).values())
                    if not aux:  # When having global/graph-level output
                        aux = D_G.graph[output]
                        aux = aux if isinstance(aux, list) else [aux]

                except Exception:
                    print_failure(
                        f"Error when trying to get output with name: {output}. "
                        "Check the data which corresponds to the output_label in the readout block."
                    )

                # if it is a 1d array, transform it into a 2d array
                if len(np.array(aux).shape) == 1:
                    aux = np.expand_dims(aux, -1)

                # try to concatenate them together. If error, it means that the two labels are incompatible
                final_output.extend(aux)
                data['__ignnition_{}_len'.format(output)] = len(aux)

        # find the adjacencies
        edges_list = list(D_G.edges())
        processed_neighbours = {}

        # create the adjacency lists that we are required to pass
        for adj_name_item in self.adj_names:
            src_entity = adj_name_item.split('_to_')[0]
            dst_entity = adj_name_item.split('_to_')[1]

            data['src_' + src_entity + '_to_' + dst_entity] = []
            data['dst_' + src_entity + '_to_' + dst_entity] = []
            data['seq_' + src_entity + '_to_' + dst_entity] = []

        for e in edges_list:
            src_node, dst_node = e
            src_num = int(src_node.split('_')[-1])
            dst_num = int(dst_node.split('_')[-1])
            src_entity = D_G.nodes[src_node]['entity']
            dst_entity = D_G.nodes[dst_node]['entity']
            if dst_node not in processed_neighbours:
                processed_neighbours[dst_node] = 0

            if src_entity + '_to_' + dst_entity in self.adj_names:
                data['src_' + src_entity + '_to_' + dst_entity].append(src_num)
                data['dst_' + src_entity + '_to_' + dst_entity].append(dst_num)
                data['seq_' + src_entity + '_to_' + dst_entity].append(processed_neighbours[dst_node])

                processed_neighbours[dst_node] += 1  # this is useful to check which sequence number to use


        # check that the dataset contains all the adjacencies needed
        if not self.warnings_shown:
            for adj_name_item in self.adj_names:
                if data['src_' + adj_name_item] == []:
                    src_entity = adj_name_item.split('_to_')[0]
                    dst_entity = adj_name_item.split('_to_')[1]
                    print_info(
                        "WARNING: The GNN definition uses edges between " + src_entity + " and " + dst_entity +
                        " but these were not found in the input graph. The MP defined between these two entities "
                        "will be ignored.\nIn case the graph ought to contain such edges, one reason for this error "
                        "is a mistake in defining the graph as directional, when the edges have been defined as "
                        "undirected. Please check the documentation.")
                    self.warnings_shown = True

        # this collects the sequence for the interleave aggregation (if any)
        for i in self.interleave_names:
            name, dst_entity = i
            interleave_definition = list(D_G.graph[name].values())  # this must be a graph variable

            involved_entities = {}
            total_sequence = []
            total_size, n_total, counter = 0, 0, 0

            for src_entity in interleave_definition:
                total_size += 1
                if src_entity not in involved_entities:
                    involved_entities[src_entity] = counter  # each entity a different value (identifier)

                    seq = data['seq_' + src_entity + '_to_' + dst_entity]
                    n_total += max(seq) + 1  # superior limit of the size of any destination
                    counter += 1

                # obtain all the original definition in a numeric format
                total_sequence.append(involved_entities[src_entity])

            # we exceed the maximum length for sake to make it multiple. Then we will cut it
            repetitions = math.ceil(float(n_total) / total_size)
            result = np.array((total_sequence * repetitions)[:n_total])

            for entity in involved_entities:
                id = involved_entities[entity]
                data['indices_' + entity + '_to_' + dst_entity] = np.where(result == id)[0].tolist()

        if self.training:
            return data, final_output
        else:
            return data
Пример #17
0
    def __validate_model_description(self, data):
        """
        Parameters
        ----------
        data:    dict
           Dictionary with the initial data
        """

        entities = data['entities']
        stages = data['message_passing']['stages']

        src_names, dst_names, called_nn_names, input_names = [], [], [], []
        output_names = ['source', 'destination']

        # check the hidden state creation
        for entity_item in entities:
            state_ops = entity_item['initial_state']
            for op in state_ops:
                if 'input' in op:
                    input_names += op['input']
                if 'output' in op:
                    output_names += op['output']

        # check the message passing
        for stage in stages:
            stage_mp = stage.get('stage_message_passings')
            for mp in stage_mp:  # for every message-passing
                dst_names.append(mp.get('destination_entity'))
                sources = mp.get('source_entities')

                # check the message functions
                for src in sources:
                    src_names.append(src.get('name'))
                    messages = src.get('message', None)
                    if messages is not None:
                        for op in messages:  # for every operation
                            if op.get('type') == 'neural_network':
                                called_nn_names.append(op.get('nn_name'))
                                input_names += op.get('input')

                            if 'output_name' in op:
                                output_names.append(op.get('output_name'))

                # check the aggregation functions
                aggregations = mp.get('aggregation')
                for aggr in aggregations:
                    if aggr.get('type') == 'neural_network':
                        input_names += aggr.get('input')

                    if 'output_name' in aggr:
                        output_names.append(aggr.get('output_name'))

        readout_op = data.get('readout')
        called_nn_names += [op.get('nn_name') for op in readout_op if op.get('type') == 'neural_network']

        if 'output_label' not in readout_op[-1]:
            print_failure('The last operation of the readout MUST contain the definition of the output_label')
        else:
            input_names += readout_op[-1]['output_label']

        # now check the entities
        entity_names = [a.get('name') for a in data.get('entities')]
        nn_names = [n.get('nn_name') for n in data.get('neural_networks')]
        # check if the name of two NN defined match
        if len(nn_names) != len(set(nn_names)):
            print_failure("The names of two NN are repeated. Please ensure that each NN has a unique name.")

        # check the source entities
        for a in src_names:
            if a not in entity_names:
                print_failure(
                    'The source entity "' + a + '" was used in a message passing. However, there is no such entity. \n '
                                                'Please check the spelling or define a new entity.')

        # check the destination entities
        for d in dst_names:
            if d not in entity_names:
                print_failure(
                    'The destination entity "' + d + '" was used in a message passing. However, there is no such '
                                                     'entity. \n Please check the spelling or define a new entity.')

        # check the nn_names
        for name in called_nn_names:
            if name not in nn_names:
                print_failure(
                    'The name "' + name + '" is used as a reference to a neural network (nn_name), even though the '
                                          'neural network was not defined. \n Please make sure the name is correctly '
                                          'spelled or define a neural network named ' + name)

        # ensure that all the inputs (that are not output of another operation) start with a $
        for i in input_names:
            if i not in output_names and i[0] != '$':
                print_failure('The input name ' + i + ' references data from the dataset but does not start with $')

        for i in output_names:
            if i[0] == '$':
                print_failure(
                    'The keyword ' + i + ' starts with $ even though it does not represent data from the dataset.')