示例#1
0
    path_eval_sentences = os.path.join(args.data_dir, 'dev.pkl')

    # Load Vocabularies
    words = tf.contrib.lookup.index_table_from_file(path_words,
                                                    num_oov_buckets=1)

    # Create the input data pipeline
    logging.info("Creating the datasets...")
    train_sentences = load_dataset_from_text(path_train_sentences)

    eval_sentences = load_dataset_from_text(path_eval_sentences)

    # Specify other parameters for the dataset and the model
    params.eval_size = params.dev_size
    params.buffer_size = params.train_size  # buffer size for shuffling
    params.id_pad_word = words.lookup(tf.constant(params.pad_word))

    # Create the two iterators over the two datasets
    train_inputs = input_fn('train', train_sentences, words, params)
    eval_inputs = input_fn('eval', eval_sentences, words, params)
    logging.info("- done.")

    # Define the models (2 different set of nodes that share weights for train and eval)
    logging.info("Creating the model...")
    train_model_spec = model_fn('train', train_inputs, params)
    eval_model_spec = model_fn('eval', eval_inputs, params, reuse=True)
    logging.info("- done.")

    # Train the model
    logging.info("Starting training for {} epoch(s)".format(params.num_epochs))
示例#2
0
    logging.info("Loading pretrained Word2Vec...")
    word2vec = gensim.models.KeyedVectors.load_word2vec_format(
        args.word2vec_file, binary=True)
    logging.info("- done.")

    logging.info("Building datasets...")
    with open(os.path.join(args.data_dir, "symbols.txt")) as f_symbols:
        symbols = [line.strip() for line in f_symbols]
    train_signal_map = {
        symbol: load_signal(path_train, path_corpus, symbol)
        for symbol in symbols[:2]
    }
    dev_signal_map = {
        symbol: load_signal(path_dev, path_corpus, symbol)
        for symbol in symbols[:2]
    }
    params.eval_size = params.dev_size
    params.buffer_size = params.train_size
    train_inputs = input_fn("train", train_signal_map, word2vec, params)
    eval_inputs = input_fn("eval", dev_signal_map, word2vec, params)
    logging.info("- done.")

    logging.info("Creating the model...")
    train_model_spec = model_fn("train", train_inputs, params)
    eval_model_spec = model_fn("eval", eval_inputs, params, reuse=True)
    logging.info("- done.")

    logging.info("Starting training for {} epoch(s)".format(params.num_epochs))
    train_and_evaluate(train_model_spec, eval_model_spec, args.model_dir,
                       params, args.restore_dir)