Exemplo n.º 1
0
    def save(self):
        i2t_path = j("data", "vocabularies",
                     "%s%s_idx2tok.json" % (self.log_name, self.suffix))
        t2i_path = j("data", "vocabularies",
                     "%s%s_tok2idx.json" % (self.log_name, self.suffix))
        voc_path = j("data", "vocabularies",
                     "%s%s_voc.json" % (self.log_name, self.suffix))

        h.save_to_json(self.unique_tokens_and_counts, voc_path)
        h.save_to_json(self.token_to_index, t2i_path)
        h.save_to_json(self.index_to_token, i2t_path)
        logger.info("Done saving vocabulary '%s%s'" %
                    (self.log_name, self.suffix))
Exemplo n.º 2
0
        tokenized_loglines[i] = [
            w if w in word_to_index else g.unknown_token for w in logline
        ]

    # assign token ids
    PAD_ID = word_to_index[g.pad_token]
    UNK_ID = word_to_index[g.unknown_token]
    BOS_ID = word_to_index[g.logline_start_token]
    EOS_ID = word_to_index[g.logline_end_token]

    if not PAD_ID == 0:
        raise (
            "Padding ID has to be 0, because tensorflow says so and all masking / padding algorithms depend on it"
        )

    save_to_json(vocabulary, g.VOCABULARY_FILE)
    save_to_json(tokenized_loglines, g.TOKENIZED_LOGLINES_FILE)
    save_to_json(index_to_word, g.INDEX_TO_WORD_FILE)
    save_to_json(word_to_index, g.WORD_TO_INDEX_FILE)
else:
    loglines = list(open(g.datafile, 'r'))
    word_to_index = load_from_json(g.WORD_TO_INDEX_FILE)
    index_to_word = load_from_json(g.INDEX_TO_WORD_FILE)
    vocabulary = load_from_json(g.VOCABULARY_FILE)
    vocabulary_size = len(vocabulary)
    tokenized_loglines = load_from_json(g.TOKENIZED_LOGLINES_FILE)
    word_frequencies = nltk.FreqDist(itertools.chain(*tokenized_loglines))
    PAD_ID = word_to_index[g.pad_token]
    UNK_ID = word_to_index[g.unknown_token]
    BOS_ID = word_to_index[g.logline_start_token]
    EOS_ID = word_to_index[g.logline_end_token]
Exemplo n.º 3
0
    "num_lstm_layers": num_lstm_layers,
    "learning_rate_decay_factor": learning_rate_decay_factor,
    "batch_size": batch_size,
    "dtype": str(DTYPE),
    "num_samples": num_samples,
    "max_gradient_norm": max_gradient_norm,
    "learning_rate": LEARNING_RATE,
    "epochs": epochs,
    "num_examples": num_examples,
    "max_steps": max_steps,
    "examples_in_hierarchy": examples_in_hierarchy,
    "color_threshhold": color_threshhold,
    "h_threshold": h_threshold,
}

save_to_json(hyperparams, HYPERPARAMETERS_FILE)
"""#  2 Graph

## Learning Rate
"""

# https://www.tensorflow.org/versions/master/api_docs/python/tf/train/exponential_decay

learning_rate = tf.train.exponential_decay(
    learning_rate=LEARNING_RATE,
    global_step=tf_global_step,  # current learning step
    decay_steps=
    adjust_learning_rate_after_steps,  # how many steps to train after decaying learning rate
    decay_rate=learning_rate_decay_factor,
    staircase=True)
"""## 2.1 Input, Output and Target of the graph"""