def output_data_definitions(self): """ Function returns a dictionary with definitions of output data produced the component. :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ return { self.key_indices: DataDefinition([-1, 1], [list, int], "Batch of sample indices [BATCH_SIZE] x [1]"), self.key_inputs: DataDefinition([-1, 1], [ list, str ], "Batch of sentences, each being a single string (many words) [BATCH_SIZE x SENTENCE]" ), self.key_targets: DataDefinition([-1, 1], [ list, str ], "Batch of targets, each being a single label (word) BATCH_SIZE x WORD]" ) }
def output_data_definitions(self): """ Function returns a dictionary with definitions of output data produced the component. :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ return { self.key_indices: DataDefinition([-1, 1], [list, int], "Batch of sample indices [BATCH_SIZE] x [1]"), self.key_sources: DataDefinition([-1, self.sentence_length, 1], [ list, list, str ], "Batch of input sentences, each consisting of several words [BATCH_SIZE] x [SENTENCE_LENGTH] x [string]" ), self.key_targets: DataDefinition([-1, self.sentence_length, 1], [ list, list, str ], "Batch of target sentences, each consisting of several words [BATCH_SIZE] x [SENTENCE_LENGTH] x [string]" ) }
def output_data_definitions(self): """ Function returns a dictionary with definitions of output data produced the component. :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ return { self.key_loss: DataDefinition( [1], [torch.Tensor], "Loss value (single value for the whole batch - a scalar)") }
def input_data_definitions(self): """ Function returns a dictionary with definitions of input data that are required by the component. :return: dictionary containing input data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ input_defs = { self.key_targets: DataDefinition( [-1] * self.num_targets_dims, [torch.Tensor], "Batch of targets (indices) [DIM 1 x DIM 2 x ... ]"), self.key_predictions: DataDefinition([-1] * (self.num_targets_dims + 1), [ torch.Tensor ], "Batch of predictions, represented as tensor with probability distribution over classes [DIM 1 x DIM x ... x NUM_CLASSES]" ) } if self.use_masking: input_defs[self.key_masks] = DataDefinition( [-1], [torch.Tensor], "Batch of masks [BATCH_SIZE]") return input_defs
def input_data_definitions(self): """ Function returns a dictionary with definitions of input data that are required by the component. :return: dictionary containing input data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ return { keys: DataDefinition(dims, [torch.Tensor], "Batch of inputs [BATCH_SIZE x ...]") for (keys, dims) in zip(self.input_stream_keys, self.input_stream_dims) }
def input_data_definitions(self): """ Function returns a dictionary with definitions of input data that are required by the component. :return: dictionary containing input data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ return { self.key_inputs: DataDefinition([-1, 3, 224, 224], [ torch.Tensor ], "Batch of images [BATCH_SIZE x IMAGE_DEPTH x IMAGE_HEIGHT x IMAGE WIDTH]" ), }
def output_data_definitions(self): """ Function returns a empty dictionary with definitions of output data produced the component. :return: Empty dictionary. """ return { self.key_outputs: DataDefinition( [-1] * (self.num_inputs_dims - 1) + [self.input_size], [torch.Tensor], "Batch of outputs [DIM 1 x DIM 2 x ... x INPUT_SIZE]") }
def input_data_definitions(self): """ Function returns a dictionary with definitions of input data that are required by the component. :return: dictionary containing input data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ return { self.key_inputs: DataDefinition( [-1] * (self.num_inputs_dims - 1) + [self.input_size], [list] * (self.num_inputs_dims - 1) + [torch.Tensor], "Batch of inputs [DIM 1 x DIM 2 x ... x INPUT_SIZE]") }
def output_data_definitions(self): """ Function returns a dictionary with definitions of output data produced the component. :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ return { self.key_outputs: DataDefinition([-1, -1], [ torch.Tensor ], "Batch of sentences represented as a single tensor of indices [BATCH_SIZE x SEQ_LENGTH]" ), }
def input_data_definitions(self): """ Function returns a dictionary with definitions of input data that are required by the component. :return: dictionary containing input data definitions (each of type :py:class:`ptp.data_types.DataDefinition`). """ return { self.key_strings: DataDefinition([-1, 1], [ list, str ], "Batch of strings, each being treated as a single 'vocabulary entry' (word) [BATCH_SIZE] x [STRING]" ) }
def input_data_definitions(self): """ Function returns a dictionary with definitions of input data that are required by the component. :return: dictionary containing input data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ return { self.key_inputs: DataDefinition([-1, -1, self.bow_size], [ list, list, torch.Tensor ], "Batch of sentences, each represented as a list of vectors [BATCH_SIZE] x [SEQ_LENGTH] x [ITEM_SIZE] (agnostic to item size)" ) }
def input_data_definitions(self): """ Function returns a dictionary with definitions of input data that are required by the component. :return: dictionary containing input data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ return { self.key_question_encodings: DataDefinition([-1, -1, self.question_encoding_size], [ torch.Tensor ], "Batch of encoded questions [BATCH_SIZE x SEQ_LEN x QUESTION_ENCODING_SIZE]" ), }
def output_data_definitions(self): """ Function returns a dictionary with definitions of output data produced the component. :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ return { self.key_outputs: DataDefinition([-1, -1, self.embeddings_size], [ torch.Tensor ], "Batch of embedded sentences [BATCH_SIZE x SENTENCE_LENGTH x EMBEDDING_SIZE]" ) }
def input_data_definitions(self): """ Function returns a dictionary with definitions of input data that are required by the component. :return: dictionary containing input data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ return { self.key_inputs: DataDefinition([-1, 1], [ list, str ], "Batch of labels (words), each represented as a single string [BATCH_SIZE] x [string]" ), }
def output_data_definitions(self): """ Function returns a dictionary with definitions of output data produced the component. :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ if self.mode_detokenize == False: return { self.key_outputs: DataDefinition([-1, -1, 1], [ list, list, str ], "Batch of tokenized sentences, each represented as a list of words [BATCH_SIZE] x [SEQ_LENGTH] x [string]" ) } else: return { self.key_outputs: DataDefinition([-1, 1], [ list, str ], "Batch of sentences, each represented as a single string [BATCH_SIZE] x [string]" ) }
def input_data_definitions(self): """ Function returns a dictionary with definitions of input data that are required by the component. :return: dictionary containing input data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ d = {} # Input depending on the input_mode if self.input_mode == "Dense": d[self.key_inputs] = DataDefinition([-1, -1, self.input_size], [torch.Tensor], "Batch of inputs, each being a sequence of items [BATCH_SIZE x SEQ_LEN x INPUT_SIZE]") elif self.input_mode == "Autoregression_First": d[self.key_inputs] = DataDefinition([-1, self.input_size], [torch.Tensor], "Batch of inputs, each being a single item [BATCH_SIZE x SEQ_LEN x INPUT_SIZE]") #else: Autoregression_None: no inputs. # Input hidden state if self.initial_state == "Input": if self.cell_type == "LSTM": d[self.key_input_state] = DataDefinition([-1, 2, self.num_layers, self.hidden_size], [torch.Tensor], "Batch of LSTM last hidden states (h0/c0) passed from another LSTM that will be used as initial [BATCH_SIZE x 2 x NUM_LAYERS x SEQ_LEN x HIDDEN_SIZE]") else: d[self.key_input_state] = DataDefinition([-1, self.num_layers, self.hidden_size], [torch.Tensor], "Batch of RNN last hidden states passed from another RNN that will be used as initial [BATCH_SIZE x NUM_LAYERS x SEQ_LEN x HIDDEN_SIZE]") return d
def output_data_definitions(self): """ Function returns a dictionary with definitions of output data produced the component. :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ return { self.key_outputs: DataDefinition([-1], [ torch.Tensor ], "Batch of labels, each represented as a single index [BATCH_SIZE]" ) }
def input_data_definitions(self): """ Function returns a dictionary with definitions of input data that are required by the component. :return: dictionary containing input data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ return { self.key_inputs: DataDefinition([-1, -1], [ torch.Tensor ], "Batch of words, each represented as a vector (probability distribution) [BATCH_SIZE x ITEM_SIZE] (agnostic to item size)" ), }
def output_data_definitions(self): """ Function returns a dictionary with definitions of output data produced the component. :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ return { self.key_outputs: DataDefinition([-1, 1], [ list, str ], "Batch of words, each represented as a single string [BATCH_SIZE] x [string]" ) }
def output_data_definitions(self): """ Function returns a dictionary with definitions of output data produced the component. :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ return { self.key_outputs: DataDefinition([-1, self.bow_size], [ torch.Tensor ], "Batch of sentences, each represented as a single vector [BATCH_SIZE x ITEM_SIZE] (agnostic to item size)" ) }
def output_data_definitions(self): """ Function returns a dictionary with definitions of output data produced the component. :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ return { self.key_predictions: DataDefinition([-1, self.prediction_size], [ torch.Tensor ], "Batch of predictions, each represented as probability distribution over classes [BATCH_SIZE x PREDICTION_SIZE]" ) }
def output_data_definitions(self): """ Function returns a dictionary with definitions of output data produced the component. :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ return { self.key_outputs: DataDefinition([-1, -1, len(self.word_to_ix)], [ list, list, torch.Tensor ], "Batch of sentences, each represented as a list of vectors [BATCH_SIZE] x [SEQ_LENGTH] x [VOCABULARY_SIZE]" ), }
def input_data_definitions(self): """ Function returns a dictionary with definitions of input data that are required by the component. :return: dictionary containing input data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ return { self.key_inputs: DataDefinition([-1, -1, self.input_size], [ torch.Tensor ], "Batch of inputs, each represented as index [BATCH_SIZE x SEQ_LEN x INPUT_SIZE]" ), }
def output_data_definitions(self): """ Function returns a dictionary with definitions of output data produced the component. :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ if self.mode_reverse: return { self.key_outputs: DataDefinition([-1, -1, 1], [ list, list, str ], "Batch of sentences, each represented as a list of words [BATCH_SIZE] x [SEQ_LENGTH] x [string]" ), } else: return { self.key_outputs: DataDefinition([-1, -1], [ torch.Tensor ], "Batch of sentences represented as a single tensor of indices of particular words [BATCH_SIZE x SEQ_LENGTH]" ), }
def input_data_definitions(self): """ Function returns a dictionary with definitions of input data that are required by the component. :return: dictionary containing input data definitions (each of type :py:class:`ptp.data_types.DataDefinition`). """ input_defs = {} # Add input prediction streams. for i, ipsk in enumerate(self.input_prediction_stream_keys): # Use input prediction stream key along with the length of the associated word mappings (i.e. size of the vocabulary = NUM_CLASSES) input_defs[ipsk] = DataDefinition([ -1, len(self.input_ix_to_word[i]) ], [ torch.Tensor ], "Batch of predictions, represented as tensor with probability distribution over classes [BATCH_SIZE x NUM_CLASSES]" ) # Add mask streams. for imsk in self.input_mask_stream_keys: # Every mask has the same definition, but different stream key. input_defs[imsk] = DataDefinition([-1], [torch.Tensor], "Batch of masks [BATCH_SIZE]") return input_defs
def output_data_definitions(self): """ Function returns a dictionary with definitions of output data produced the component. :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ return { self.key_indices: DataDefinition([-1, 1], [list, int], "Batch of sample indices [BATCH_SIZE] x [1]"), self.key_images: DataDefinition([-1, self.depth, self.height, self.width], [ torch.Tensor ], "Batch of images [BATCH_SIZE x IMAGE_DEPTH x IMAGE_HEIGHT x IMAGE_WIDTH]" ), self.key_image_ids: DataDefinition([-1, 1], [ list, str ], "Batch of image names, each being a single word [BATCH_SIZE] x [STRING]" ), self.key_image_sizes: DataDefinition([-1, 2], [ torch.Tensor ], "Batch of original sizes (height, width) of images [BATCH_SIZE x 2]" ), self.key_questions: DataDefinition([-1, 1], [ list, str ], "Batch of questions, each being a string consisting of many words [BATCH_SIZE] x [STRING]" ), self.key_answers: DataDefinition([-1, 1], [ list, str ], "Batch of target answers, each being a string consisting of many words [BATCH_SIZE] x [STRING]" ), self.key_category_ids: DataDefinition([-1], [ torch.Tensor ], "Batch of target category indices, each being a single index [BATCH_SIZE]" ), self.key_category_names: DataDefinition([-1, 1], [ list, str ], "Batch of category target names, each being a single word [BATCH_SIZE] x [STRING]" ), }
def output_data_definitions(self): """ Function returns a dictionary with definitions of output data produced the component. :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ d = {} # Output: predictions stream. if self.prediction_mode == "Dense": d[self.key_predictions] = DataDefinition([-1, -1, self.prediction_size], [torch.Tensor], "Batch of predictions, each represented as sequence of probability distributions over classes [BATCH_SIZE x SEQ_LEN x PREDICTION_SIZE]") elif self.prediction_mode == "Last": # "Last" # Only last prediction. d[self.key_predictions] = DataDefinition([-1, self.prediction_size], [torch.Tensor], "Batch of predictions, each represented as a single probability distribution over classes [BATCH_SIZE x PREDICTION_SIZE]") # Else: no predictions. # Output: hidden state stream. if self.output_last_state: if self.cell_type == "LSTM": d[self.key_output_state] = DataDefinition([-1, 2, self.num_layers, self.hidden_size], [torch.Tensor], "Batch of LSTM final hidden states (h0/c0) [BATCH_SIZE x 2 x NUM_LAYERS x SEQ_LEN x HIDDEN_SIZE]") else: d[self.key_output_state] = DataDefinition([-1, self.num_layers, self.hidden_size], [torch.Tensor], "Batch of RNN final hidden states [BATCH_SIZE x NUM_LAYERS x SEQ_LEN x HIDDEN_SIZE]") return d
def test_handshake_input_definitions_dimensions(self): """ Tests handskaking of input definition keys. """ all_defs = {} all_defs["input2"] = DataDefinition([-1, -1, -1], [list, list, str], "comment2") all_defs["input3"] = DataDefinition([-1, -1], [float], "comment3") # One input with wrong number of dimensions. all_defs["input1"] = DataDefinition([-1, 10], [list, int], "comment") self.assertEqual( self.component.handshake_input_definitions(all_defs, log_errors=False), 1) # One input with wrong number of dimensions, but with dynamic size provided while we expect fixed. all_defs["input1"] = DataDefinition([-1, -1], [list, int], "comment") self.assertEqual( self.component.handshake_input_definitions(all_defs, log_errors=False), 1) # One input with wrong number of dimensions, but it is the "agnostic" one. all_defs["input1"] = DataDefinition([1, 1], [list, int], "comment") self.assertEqual( self.component.handshake_input_definitions(all_defs, log_errors=False), 0)
def output_data_definitions(self): """ Function returns a dictionary with definitions of output data produced the component. :return: dictionary containing output data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ return { self.key_feature_maps: DataDefinition([ -1, self.out_channels_conv3, self.height_features_maxpool3, self.width_features_maxpool3 ], [ torch.Tensor ], "Batch of filter maps [BATCH_SIZE x FEAT_DEPTH x FEAT_HEIGHT x FEAT_WIDTH]" ) }
def input_data_definitions(self): """ Function returns a dictionary with definitions of input data that are required by the component. :return: dictionary containing input data definitions (each of type :py:class:`ptp.utils.DataDefinition`). """ # Generate the description of input stream. dims_desc = [ "DIM {}".format(i) for i in range(self.num_inputs_dims - 1) ] desc = "Batch of outputs [" + " x ".join(dims_desc) + "]" return { self.key_inputs: DataDefinition([-1] * (self.num_inputs_dims - 1) + [self.input_size], [torch.Tensor], desc) }