Exemplo n.º 1
0
def load_data():
    from rnn import load_data
    dev_data, _ = load_data(config,
                            0,
                            'dev',
                            chunking=config.value("chunking", ""),
                            seq_ordering="sorted",
                            shuffle_frames_of_nseqs=0)
    eval_data, _ = load_data(config,
                             0,
                             'eval',
                             chunking=config.value("chunking", ""),
                             seq_ordering="sorted",
                             shuffle_frames_of_nseqs=0)
    train_data, _ = load_data(config, 0, 'train')
    return dev_data, eval_data, train_data
Exemplo n.º 2
0
    def create_engine():
        dev_data, eval_data, train_data = load_data()

        engine = Engine()
        engine.init_train_from_config(config, train_data, dev_data, eval_data)
        engine.init_train_epoch()
        engine.train_batches = engine.train_data.generate_batches(
            recurrent_net=engine.network.recurrent,
            batch_size=engine.batch_size,
            max_seqs=engine.max_seqs,
            max_seq_length=engine.max_seq_length,
            seq_drop=engine.seq_drop,
            shuffle_batches=engine.shuffle_batches,
            used_data_keys=engine.network.used_data_keys)
        engine.updater.set_learning_rate(engine.learning_rate,
                                         session=engine.tf_session)
        engine.updater.init_optimizer_vars(session=engine.tf_session)
        return engine
Exemplo n.º 3
0
def hello_world():
    """Main index page of the site
    :return html_template taking in the generated text, if any"""

    text = ""
    if request.method == "POST":

        # Retrieve form data
        model = request.form.get("model_type")
        history = False
        if request.form.get("history"):
            history = True
        seed_text = request.form.get("seed-text")
        if seed_text == "":
            seed_text = "<"

        # Load the data
        datapath = WINE_DATA if model == "wine" else JOKE_DATA
        seq, itoc, ctoi = rnn.load_data(datapath)

        if history:
            # Generate text from the same model at different points in training
            filepaths = WINES if model == "wine" else JOKES
            models = []
            for filepath in filepaths:
                models.append(torch.load(filepath))
            for model in models:
                text += model.generate_text(ctoi,
                                            itoc,
                                            seq.shape[1],
                                            start_phrase=seed_text)
                text += "\n\n"
        else:
            # Generate text from the best model of the given type
            path = WINE if model == "wine" else JOKE
            model = torch.load(path)
            text += model.generate_text(ctoi,
                                        itoc,
                                        seq.shape[1],
                                        start_phrase=seed_text)

    return render_template("main.html", text=text)
Exemplo n.º 4
0
NUM_LAYERS = 1
EMB_SIZE = 128
HID_SIZE = 256
PAD_CHAR = '#'
JOKES = "jokes/jokes.txt"
WINE = "wine_data/wine_reviews_short.txt"

dev = 'cpu'
if torch.cuda.is_available():
    dev = 'cuda'

# Change this to JOKES to train on and generate jokes
filename = JOKES

seq, itoc, ctoi = rnn.load_data(filename)

if filename == JOKES:
    X_train, X_test = train_test_split(seq, test_size=0.25, shuffle=False)
else:
    # Decrease the text set even more to decrease training time
    X_train, X_test = train_test_split(seq, test_size=0.8, shuffle=False)
    X_test, _ = train_test_split(X_test, test_size=0.98, shuffle=False)

charnn = rnn.CharNN(len(itoc),
                    num_layers=NUM_LAYERS,
                    emb_size=EMB_SIZE,
                    hid_size=HID_SIZE,
                    pad_idx=ctoi[PAD_CHAR],
                    max_len=seq.shape[1],
                    device=dev)