Exemplo n.º 1
0
    def __init__(self, name, config):
        """
        Initializes the component.

        :param name: Component name (read from configuration file).
        :type name: str

        :param config: Dictionary of parameters (read from the configuration ``.yaml`` file).
        :type config: :py:class:`ptp.configuration.ConfigInterface`

        """
        # Call constructor(s) of parent class(es) - in the right order!
        Component.__init__(self, name, SentenceIndexer, config)
        WordMappings.__init__(self)

        # Set key mappings.
        self.key_inputs = self.stream_keys["inputs"]
        self.key_outputs = self.stream_keys["outputs"]

        # Read mode from the configuration.
        self.mode_reverse = self.config['reverse']

        # Force padding to a fixed length
        self.fixed_padding = self.config['fixed_padding']

        # Wether to add <EOS> at the end of sequence
        self.enable_eos_token = self.config['eos_token']

        if self.mode_reverse:
            # We will need reverse (index:word) mapping.
            self.ix_to_word = dict((v, k) for k, v in self.word_to_ix.items())

        # Get inputs distributions/indices flag.
        self.use_input_distributions = self.config["use_input_distributions"]
Exemplo n.º 2
0
    def __init__(self, name, config):
        """
        Initializes object.

        :param name: Loss name.
        :type name: str

        :param config: Dictionary of parameters (read from the configuration ``.yaml`` file).
        :type config: :py:class:`ptp.configuration.ConfigInterface`

        """
        # Call constructors of parent classes.
        Component.__init__(self, name, AccuracyStatistics, config)

        # Get stream key mappings.
        self.key_targets = self.stream_keys["targets"]
        self.key_predictions = self.stream_keys["predictions"]
        self.key_masks = self.stream_keys["masks"]

        # Get prediction distributions/indices flag.
        self.use_prediction_distributions = self.config[
            "use_prediction_distributions"]

        # Get masking flag.
        self.use_masking = self.config["use_masking"]

        # Get statistics key mappings.
        self.key_accuracy = self.statistics_keys["accuracy"]
    def __init__(self, name, config):
        """
        Initializes object. Loads keys and values of variables and adds them to globals.

        :param name: Loss name.
        :type name: str

        :param config: Dictionary of parameters (read from the configuration ``.yaml`` file).
        :type config: :py:class:`ptp.configuration.ConfigInterface`

        """
        # Call constructors of parent classes.
        Component.__init__(self, name, GlobalVariablePublisher, config)

        # Get list of keys of global variables - can be both list of strings or a single string with comma-separated values.
        keys = self.config["keys"]
        if type(keys) is str:
            keys = keys.replace(" ", "").split(",")
        # Get list of values - must be a single value or a list.
        values = self.config["values"]

        if type(values) is list:
            # Make sure that both are lists.
            if type(keys) is not list or len(keys) != len(values):
                raise ConfigurationError(
                    "Number of parameters indicated by provided 'keys' must be equal to number of provided 'values'"
                )

            # Publish globals one by one.
            for (key, value) in zip(keys, values):
                self.globals[key] = value
        elif keys != '':
            # Publish single global.
            self.globals[keys[0]] = values
    def __init__(self, name, config):
        """
        Initializes the component.

        :param name: Component name (read from configuration file).
        :type name: str

        :param config: Dictionary of parameters (read from the configuration ``.yaml`` file).
        :type config: :py:class:`ptp.configuration.ConfigInterface`

        """
        # Call constructors of parent classes.
        Component.__init__(self, name, SentenceTokenizer, config)

        # Read the actual configuration.
        self.mode_detokenize = config['detokenize']

        # Tokenizer.
        self.tokenizer = WhitespaceTokenizer()

        # Set key mappings.
        self.key_inputs = self.stream_keys["inputs"]
        self.key_outputs = self.stream_keys["outputs"]

        if self.mode_detokenize:
            # list of strings -> sentence.
            self.processor = self.detokenize_sample
        else:
            # sentence -> list of strings.
            self.processor = self.tokenize_sample
Exemplo n.º 5
0
    def __init__(self, name, class_type, config):
        """
        Initializes a Model object.

        :param name: Model name.
        :type name: str

        :param class_type: Class type of the component.

        :param config: Parameters read from configuration file.
        :type config: ``ptp.configuration.ConfigInterface``

        This constructor:

        - calls base class constructors (save config, name, logger, app_state etc.)

        - initializes the best model loss (used to select which model to save) to ``np.inf``:

            >>> self.best_loss = np.inf

        """
        # Call constructors of parent classes.
        Component.__init__(self, name, class_type, config)
        Module.__init__(self)

        # Flag indicating whether the model is frozen or not.
        self.frozen = False
    def __init__(self, name, config):
        """
        Initializes object.

        :param name: Loss name.
        :type name: str

        :param config: Dictionary of parameters (read from the configuration ``.yaml`` file).
        :type config: :py:class:`ptp.configuration.ConfigInterface`

        """
        # Call constructors of parent classes.
        Component.__init__(self, name, PrecisionRecallStatistics, config)

        # Get stream key mappings.
        self.key_targets = self.stream_keys["targets"]
        self.key_predictions = self.stream_keys["predictions"]
        self.key_masks = self.stream_keys["masks"]

        # Get prediction distributions/indices flag.
        self.use_prediction_distributions = self.config[
            "use_prediction_distributions"]

        # Get masking flag.
        self.use_masking = self.config["use_masking"]

        # Get statistics key mappings.
        self.key_precision = self.statistics_keys["precision"]
        self.key_recall = self.statistics_keys["recall"]
        self.key_f1score = self.statistics_keys["f1score"]

        # Get (or create) vocabulary.
        if self.config["use_word_mappings"]:
            # Get labels from word mappings.
            self.labels = []
            self.index_mappings = {}
            # Assume they are ordered, starting from 0.
            for i, (word,
                    index) in enumerate(self.globals["word_mappings"].items()):
                self.labels.append(word)
                self.index_mappings[index] = i
            # Set number of classes by looking at labels.
            self.num_classes = len(self.labels)
        else:
            # Get the number of possible outputs.
            self.num_classes = self.globals["num_classes"]
            self.labels = list(range(self.num_classes))
            self.index_mappings = {i: i for i in range(self.num_classes)}

        # Check display options.
        self.show_confusion_matrix = self.config["show_confusion_matrix"]
        self.show_class_scores = self.config["show_class_scores"]
    def __init__(self, name, config):
        """
        Initializes the object. Loads keys, word mappings and vocabularies.

        :param name: Name of the component read from the configuration file
        :type name: str

        :param config: Dictionary of parameters (read from the configuration ``.yaml`` file).
        :type config: :py:class:`ptp.configuration.ConfigInterface`

        """
        # Call constructors of parent classes.
        Component.__init__(self, name, JoinMaskedPredictions, config)

        # Get input key mappings.
        # Load list of prediction streams names (keys).
        self.input_prediction_stream_keys = self.config[
            "input_prediction_streams"]
        if type(self.input_prediction_stream_keys) == str:
            self.input_prediction_stream_keys = self.input_prediction_stream_keys.replace(
                " ", "").split(",")
        #assert(self.input_prediction_stream_keys != ""), "ooo"

        # Load list of mask streams names (keys).
        self.input_mask_stream_keys = self.config["input_mask_streams"]
        if type(self.input_mask_stream_keys) == str:
            self.input_mask_stream_keys = self.input_mask_stream_keys.replace(
                " ", "").split(",")

        # Load list of word mappings names (keys).
        input_word_mappings_keys = self.config["input_word_mappings"]
        if type(input_word_mappings_keys) == str:
            input_word_mappings_keys = input_word_mappings_keys.replace(
                " ", "").split(",")

        # Retrieve input word mappings from globals.
        self.input_ix_to_word = []
        for wmk in input_word_mappings_keys:
            # Get word mappings.
            word_to_ix = self.globals[wmk]
            # Create inverse transformation.
            ix_to_word = {value: key for (key, value) in word_to_ix.items()}
            self.input_ix_to_word.append(ix_to_word)

        # Get output key mappings.
        self.key_output_indices = self.stream_keys["output_indices"]
        self.key_output_strings = self.stream_keys["output_strings"]

        # Retrieve output word mappings from globals.
        self.output_word_to_ix = self.globals["output_word_mappings"]
Exemplo n.º 8
0
    def __init__(self, name, config):
        """
        Initializes object.

        :param name: Batch size name.
        :type name: str

        :param config: Dictionary of parameters (read from the configuration ``.yaml`` file).
        :type config: :py:class:`ptp.configuration.ConfigInterface`

        """
        # Call constructors of parent classes.
        Component.__init__(self, name, BatchSizeStatistics, config)

        # Set key mappings.
        self.key_indices = self.stream_keys["indices"]
Exemplo n.º 9
0
    def __init__(self, name, config):
        """
        Initializes the component.

        :param name: Component name (read from configuration file).
        :type name: str

        :param config: Dictionary of parameters (read from the configuration ``.yaml`` file).
        :type config: :py:class:`ptp.configuration.ConfigInterface`

        """
        # Call constructors of parent classes.
        Component.__init__(self, name, SentenceTokenizer, config)

        # Read the actual configuration.
        self.mode_detokenize = config['detokenize']

        # Get preprocessing.
        self.preprocessing = get_value_list_from_dictionary(
            "preprocessing", self.config,
            'none | lowercase | remove_punctuation | all'.split(" | ")
            )
        if 'none' in self.preprocessing:
            self.preprocessing = []
        if 'all' in self.preprocessing:
            self.preprocessing = 'lowercase | remove_punctuation'.split(" | ")
        self.logger.info("Applied preprocessing: {}".format(self.preprocessing))

        self.remove_characters = get_value_list_from_dictionary("remove_characters", self.config)
        self.logger.info("Additional characters that will be removed during preprocessing: {}".format(self.remove_characters))

        if 'remove_punctuation' in self.preprocessing:
            self.translator = str.maketrans('', '', string.punctuation)

        # Tokenizer.
        self.tokenizer = nltk.tokenize.WhitespaceTokenizer()

        # Set key mappings.
        self.key_inputs = self.stream_keys["inputs"]
        self.key_outputs = self.stream_keys["outputs"]

        if self.mode_detokenize:
            # list of strings -> sentence.
            self.processor = self.detokenize_sample
        else:
            # sentence -> list of strings.
            self.processor = self.tokenize_sample
    def __init__(self, name, config):
        """
        Initializes the component.

        :param name: Component name (read from configuration file).
        :type name: str

        :param config: Dictionary of parameters (read from the configuration ``.yaml`` file).
        :type config: :py:class:`ptp.configuration.ConfigInterface`

        """
        # Call constructor(s) of parent class(es) - in the right order!
        Component.__init__(self, name, SentenceOneHotEncoder, config)
        WordMappings.__init__(self)

        # Set key mappings.
        self.key_inputs = self.stream_keys["inputs"]
        self.key_outputs = self.stream_keys["outputs"]
Exemplo n.º 11
0
    def __init__(self, name, config):
        """
        Initializes object. Loads key and word mappings.

        :param name: Loss name.
        :type name: str

        :param config: Dictionary of parameters (read from the configuration ``.yaml`` file).
        :type config: :py:class:`ptp.configuration.ConfigInterface`

        """
        # Call constructors of parent classes.
        Component.__init__(self, name, StringToMask, config)

        # Get key mappings.
        self.key_strings = self.stream_keys["strings"]
        self.key_masks = self.stream_keys["masks"]

        # Retrieve word mappings from globals.
        self.word_to_ix = self.globals["word_mappings"]
Exemplo n.º 12
0
    def __init__(self, name, class_type, config):
        """
        Initializes loss object.

        :param name: Loss name.
        :type name: str

        :param class_type: Class type of the component.

        :param config: Dictionary of parameters (read from the configuration ``.yaml`` file).
        :type config: :py:class:`ptp.utils.ConfigInterface`

        """
        # Call constructors of parent classes.
        Component.__init__(self, name, class_type, config)

        # Get key mappings.
        self.key_targets = self.stream_keys["targets"]
        self.key_predictions = self.stream_keys["predictions"]
        self.key_loss = self.stream_keys["loss"]
Exemplo n.º 13
0
    def __init__(self, name, config):
        """
        Initializes the bag-of-word encoded by creating dictionary mapping ALL words from training, validation and test sets into unique indices.

        :param name: Component name (read from configuration file).
        :type name: str

        :param config: Dictionary of parameters (read from configuration ``.yaml`` file).
        :type config: :py:class:`ptp.configuration.ConfigInterface`

        """
        # Call constructors of parent classes.
        Component.__init__(self, name, BOWEncoder, config)

        # Default name mappings for all encoders.
        self.key_inputs = self.stream_keys["inputs"]
        self.key_outputs = self.stream_keys["outputs"]

        # Retrieve bow size from global variables.
        self.bow_size = self.globals["bow_size"]
Exemplo n.º 14
0
    def __init__(self, name, config):
        """
        Initializes the component.

        :param name: Component name (read from configuration file).
        :type name: str

        :param config: Dictionary of parameters (read from the configuration ``.yaml`` file).
        :type config: :py:class:`ptp.configuration.ConfigInterface`

        """
        # Call constructor(s) of parent class(es) - in the right order!
        Component.__init__(self, name, WordDecoder, config)
        WordMappings.__init__(self)

        # Construct reverse mapping for faster processing.
        self.ix_to_word = dict((v, k) for k, v in self.word_to_ix.items())

        # Set key mappings.
        self.key_inputs = self.stream_keys["inputs"]
        self.key_outputs = self.stream_keys["outputs"]
Exemplo n.º 15
0
    def __init__(self, name, config):
        """
        Initializes object.

        :param name: Name of the component loaded from the configuration file.
        :type name: str

        :param config: Dictionary of parameters (read from the configuration ``.yaml`` file).
        :type config: :py:class:`ptp.configuration.ConfigInterface`

        """
        # Call constructors of parent classes.
        Component.__init__(self, name, ReduceTensor, config)

        # Set key mappings.
        self.key_inputs = self.stream_keys["inputs"]
        self.key_outputs = self.stream_keys["outputs"]

        # Get number of input dimensions from configuration.
        self.num_inputs_dims = self.config["num_inputs_dims"]
        # Get size of a single input item (last dimension) from globals.
        self.input_size = self.globals["input_size"]

        # Get reduction tparamsype from configuration.
        self.dim = self.config["reduction_dim"]
        self.keepdim = self.config["keepdim"]

        # Set reduction type.
        rt = get_value_from_dictionary(
            "reduction_type", self.config,
            'sum | mean | min | max | argmin | argmax'.split(" | "))
        reduction_types = {}
        reduction_types["sum"] = torch.sum
        reduction_types["mean"] = torch.mean
        reduction_types["min"] = torch.min
        reduction_types["max"] = torch.max
        reduction_types["argmin"] = torch.argmin
        reduction_types["argmax"] = torch.argmax

        self.reduction = reduction_types[rt]
Exemplo n.º 16
0
    def __init__(self, name, config):
        """
        Initializes object.

        :param name: Loss name.
        :type name: str

        :param config: Dictionary of parameters (read from the configuration ``.yaml`` file).
        :type config: :py:class:`ptp.configuration.ConfigInterface`

        """
        # Call constructors of parent classes.
        Component.__init__(self, name, PrecisionRecallStatistics, config)

        # Set key mappings.
        self.key_targets = self.stream_keys["targets"]
        self.key_predictions = self.stream_keys["predictions"]

        # Get statistic key mappings.
        self.key_precision = self.statistics_keys["precision"]
        self.key_recall = self.statistics_keys["recall"]
        self.key_f1score = self.statistics_keys["f1score"]

        # Get the number of possible outputs.
        self.num_classes = self.globals["num_classes"]

        # Get (or create) vocabulary.
        if self.config["use_word_mappings"]:
            # Get labels from word mappings.
            self.labels = []
            # Assume they are ordered, starting from 0.
            for key in self.globals["word_mappings"].keys():
                self.labels.append(key)
        else:
            self.labels = list(range(self.num_classes))

        # Check display options.
        self.show_confusion_matrix = self.config["show_confusion_matrix"]
        self.show_class_scores = self.config["show_class_scores"]
Exemplo n.º 17
0
    def __init__(self, name, config):
        """
        Initializes loss object.

        :param name: Loss name.
        :type name: str

        :param config: Dictionary of parameters (read from the configuration ``.yaml`` file).
        :type config: :py:class:`ptp.configuration.ConfigInterface`

        """
        # Call constructors of parent classes.
        Component.__init__(self, name, ImageViewer, config)

        # Get default key mappings.
        self.key_indices = self.stream_keys["indices"]
        self.key_images = self.stream_keys["images"]
        self.key_labels = self.stream_keys["labels"]
        self.key_answers = self.stream_keys["answers"]

        # Get sample number.
        self.sample_number = self.config["sample_number"]
Exemplo n.º 18
0
    def __init__(self, name, config):
        """
        Initializes object.

        :param name: Loss name.
        :type name: str

        :param config: Dictionary of parameters (read from the configuration ``.yaml`` file).
        :type config: :py:class:`ptp.configuration.ConfigInterface`

        """
        # Call constructors of parent classes.
        Component.__init__(self, name, BLEUStatistics, config)

        # Get stream key mappings.
        self.key_targets = self.stream_keys["targets"]
        self.key_predictions = self.stream_keys["predictions"]
        self.key_masks = self.stream_keys["masks"]

        # Get prediction distributions/indices flag.
        self.use_prediction_distributions = self.config[
            "use_prediction_distributions"]

        # Get masking flag.
        #self.use_masking = self.config["use_masking"]

        # Get ignored words
        self.ignored_words = self.config["ignored_words"]

        # Retrieve word mappings from globals.
        word_to_ix = self.globals["word_mappings"]
        # Construct reverse mapping for faster processing.
        self.ix_to_word = dict((v, k) for k, v in word_to_ix.items())

        # Get masking flag.
        self.weights = self.config["weights"]

        # Get statistics key mappings.
        self.key_bleu = self.statistics_keys["bleu"]
Exemplo n.º 19
0
    def __init__(self, name, class_type, config):
        """
        Initializes task object:
            - calls base class constructors.
            - sets key_indices variable (used for storing indices of samples)

                >>> self.key_indices = self.mapkey("indices")

            - sets empry curriculim learning configuration

                >>> self.curriculum_config = {}
        
        :param name: Task name.
        :type name: str

        :param class_type: Class type of the component.

        :param config: Dictionary of parameters (read from the configuration ``.yaml`` file).
        :type config: :py:class:`ptp.configuration.ConfigInterface`

        .. note::

            It is likely to encounter a case where the model needs a parameter value only known when the task has been
            instantiated, like the size of a vocabulary set or the number of marker bits.

            The user can pass those values in this app_state. All objects will be able to access it later:

                >>> self.app_state["new_global_value"] = 1 # Sets global value.
                >>> val = self.app_state["new_global_value" # Gets global value.
        """
        # Call constructors of parent classes.
        Component.__init__(self, name, class_type, config)
        Dataset.__init__(self)

        # Get key mappings for indices.
        self.key_indices = self.stream_keys["indices"]

        # Empty curriculum learning config - for now.
        self.curriculum_config = {}
Exemplo n.º 20
0
    def __init__(self, name, config):
        """
        Initializes loss object.

        :param name: Loss name.
        :type name: str

        :param config: Dictionary of parameters (read from the configuration ``.yaml`` file).
        :type config: :py:class:`ptp.configuration.ConfigInterface`

        """
        # Call constructors of parent classes.
        Component.__init__(self, name, StreamViewer, config)

        # Get key mappings for indices.
        self.key_indices = self.stream_keys["indices"]

        # Load list of streams names (keys).
        self.input_stream_keys = get_value_list_from_dictionary("input_streams", self.config)
        
        # Get sample number.
        self.sample_number = self.config["sample_number"]
Exemplo n.º 21
0
    def __init__(self, name, config):
        """
        Initializes the object, retrieves names of input streams and creates the output file in experiment directory.

        :param name: Name of the component.
        :type name: str

        :param config: Dictionary of parameters (read from the configuration ``.yaml`` file).
        :type config: :py:class:`ptp.configuration.ConfigInterface`

        """
        # Call constructors of parent classes.
        Component.__init__(self, name, StreamFileExporter, config)

        # Get key mappings for indices.
        self.key_indices = self.stream_keys["indices"]

        # Load list of streams names (keys).
        self.input_stream_keys = get_value_list_from_dictionary(
            "input_streams", self.config)

        # Get separator.
        self.separator = self.config["separator"]

        # Create file where we will write the results.
        filename = self.config["filename"]
        abs_filename = path.join(self.app_state.log_dir, filename)
        self.file = open(abs_filename, 'w')

        # Export additional line with separator.
        if self.config["export_separator_line_to_csv"]:
            self.file.write("sep={}\n".format(self.separator))

        # Export header - once, when we will process the first batch.
        self.export_header = self.config["export_header_to_csv"]

        self.logger.info("Writing values from {} streams to {}".format(
            self.input_stream_keys, abs_filename))
Exemplo n.º 22
0
    def __init__(self, name, config):
        """
        Initializes object.

        :param name: Name of the component loaded from the configuration file.
        :type name: str

        :param config: Dictionary of parameters (read from the configuration ``.yaml`` file).
        :type config: :py:class:`ptp.configuration.ConfigInterface`

        """
        # Call constructors of parent classes.
        Component.__init__(self, name, ListToTensor, config)

        # Set key mappings.
        self.key_inputs = self.stream_keys["inputs"]
        self.key_outputs = self.stream_keys["outputs"]

        # Get number of input dimensions from configuration.
        self.num_inputs_dims = self.config["num_inputs_dims"]

        # Get size of a single input item (last dimension) from globals.
        self.input_size = self.globals["input_size"]
Exemplo n.º 23
0
    def __init__(self, name, config):
        """
        Initializes object.

        :param name: Loss name.
        :type name: str

        :param config: Dictionary of parameters (read from the configuration ``.yaml`` file).
        :type config: :py:class:`ptp.configuration.ConfigInterface`

        """
        # Call constructors of parent classes.
        Component.__init__(self, name, ReshapeTensor, config)

        # Set key mappings.
        self.key_inputs = self.stream_keys["inputs"]
        self.key_outputs = self.stream_keys["outputs"]

        # Get input and output shapes from configuration.
        self.input_dims = [int(x) for x in self.config["input_dims"]]
        self.output_dims = [int(x) for x in self.config["output_dims"]]

        # Set global variable - all dimensions ASIDE OF BATCH.
        self.globals["output_size"] = self.output_dims[1:]
Exemplo n.º 24
0
    def __init__(self, name, config):
        """
        Initializes object.

        :param name: Loss name.
        :type name: str

        :param config: Dictionary of parameters (read from the configuration ``.yaml`` file).
        :type config: :py:class:`ptp.configuration.ConfigInterface`

        """
        # Call constructors of parent classes.
        Component.__init__(self, name, ConcatenateTensor, config)

        # Get key mappings.
        self.key_outputs = self.stream_keys["outputs"]

        # Load list of streams names (keys).
        self.input_stream_keys = self.config["input_streams"]
        if type(self.input_stream_keys) == str:
            self.input_stream_keys = self.input_stream_keys.replace(
                " ", "").split(",")

        # Get input shapes from configuration.
        # Assuming that it will be list of lists.
        self.input_stream_dims = [[int(x) for x in dims]
                                  for dims in self.config["input_dims"]]

        # Get output shape from configuration.
        self.output_dims = [int(x) for x in self.config["output_dims"]]

        # Get concatenation dimension.
        self.dim = self.config["dim"]

        # Set global variable - all dimensions ASIDE OF BATCH.
        self.globals["output_size"] = self.output_dims[1:]
Exemplo n.º 25
0
 def __init__(self, name, config):
     Component.__init__(self, name, None, config)
Exemplo n.º 26
0
    def __init__(self, name, class_type, config):
        """
        Initializes the (word:index) mappings.

        Loads parameters from configuration, 

        :param name: Component name (read from configuration file).
        :type name: str

        :param class_type: Class type of the component (derrived from this class).

        :param config: Dictionary of parameters (read from the configuration ``.yaml`` file).
        :type config: :py:class:`ptp.configuration.ConfigInterface`

        """
        # Call constructors of parent classes.
        Component.__init__(self, name, class_type, config)

        # Read the actual configuration.
        self.data_folder = os.path.expanduser(self.config['data_folder'])

        # Source and resulting (indexed) vocabulary.
        self.source_vocabulary_files = self.config['source_vocabulary_files']
        self.word_mappings_file = self.config['word_mappings_file']

        # Set aboslute path to file with word mappings.
        word_mappings_file_path = os.path.join(
            os.path.expanduser(self.data_folder), self.word_mappings_file)

        # Check if we want to export word mappings to globals.
        if self.config["import_word_mappings_from_globals"]:
            self.word_to_ix = self.globals["word_mappings"]
            assert (
                len(self.word_to_ix) > 0
            ), "The word mappings imported from global variables are empty!"
            # We could also get vocabulary_size from globals... but what for;)

        elif self.word_mappings_file != "" and os.path.exists(
                word_mappings_file_path) and not self.config['regenerate']:
            # Try to load the preprocessed word mappings.
            self.word_to_ix = wm.load_word_mappings_from_csv_file(
                self.logger, self.data_folder, self.word_mappings_file)
            assert (len(self.word_to_ix) >
                    0), "The word mappings loaded from file are empty!"

        else:
            # Try to generate new word mappings from source files.
            self.word_to_ix = wm.generate_word_mappings_from_source_files(
                self.logger, self.data_folder, self.source_vocabulary_files)
            assert (len(self.word_to_ix) >
                    0), "The word mappings generated from sources are empty!"
            # Ok, save mappings, so next time we will simply load them.
            wm.save_word_mappings_to_csv_file(self.logger, self.data_folder,
                                              self.word_mappings_file,
                                              self.word_to_ix)

        # Check if additional tokens are present.
        self.additional_tokens = self.config["additional_tokens"].split(',')
        for word in self.additional_tokens:
            # If new token.
            if word != '' and word not in self.word_to_ix:
                self.word_to_ix[word] = len(self.word_to_ix)

        self.logger.info(
            "Initialized word mappings with vocabulary of size {}".format(
                len(self.word_to_ix)))

        # Check if we want to export word mappings to globals.
        if self.config["export_word_mappings_to_globals"]:
            self.globals["word_mappings"] = self.word_to_ix
            # Export vocabulary size to globals.
            self.globals["vocabulary_size"] = len(self.word_to_ix)
Exemplo n.º 27
0
 def __init__(self):
     Component.__init__(self, "MockupComponent", None, ConfigInterface())