示例#1
0
文件: train_seq.py 项目: Rain-Y/QGExp
def func(save_dir=r'../outputs',
         vocab=r'../outputs/vocabulary.txt',
         train=r'../outputs/train-data.npz',
         valid=r'../outputs/valid-data.npz',
         bidirectional=1,
         batch_size=32,
         num_epochs=1,
         learning_rate=0.001,
         dropout_keep=1.0,
         interval=1000,
         lstm_units=500,
         embedding_size=100,
         in_embeddings=None,
         in_train_embeddings=False):
    logging.basicConfig(level=logging.INFO)

    sess = tf.Session()
    wd = utils.WordDictionary(vocab)
    embeddings = load_or_create_embeddings(in_embeddings, wd.vocabulary_size,
                                           embedding_size)

    logging.info('Reading training data')
    train_data = utils.load_binary_data(train)
    logging.info('Reading validation data')
    valid_data = utils.load_binary_data(valid)
    logging.info('Creating model')

    train_embeddings = in_train_embeddings if in_embeddings else True
    model = seq.seq2seqModel(lstm_units,
                             embeddings,
                             wd.eos_index,
                             train_embeddings=train_embeddings,
                             bidirectional=bidirectional,
                             condition=True)

    sess.run(tf.global_variables_initializer())
    show_parameter_count(model.get_trainable_variables())
    logging.info('Initialized the model and all variables. Starting training.')
    model.train(sess,
                save_dir,
                train_data,
                valid_data,
                batch_size,
                num_epochs,
                learning_rate,
                dropout_keep,
                5.0,
                report_interval=interval)
    pass
                        default=2)
    parser.add_argument('--embeddings',
                        help='Numpy embeddings file. If not supplied, '
                        'random embeddings are generated.')
    parser.add_argument('data', help='data directory name')
    args = parser.parse_args()

    logging.basicConfig(level=logging.INFO)
    path = args.data + '/vocabulary.txt'
    wd = utils.WordDictionary(path)
    embeddings = load_or_create_embeddings(args.embeddings, wd.vocabulary_size,
                                           args.embedding_size)

    logging.info('Reading training data')
    path = args.data + '/train-data.npz'
    train_data = utils.load_binary_data(path)
    logging.info('Reading validation data')
    path = args.data + '/valid-data.npz'
    valid_data = utils.load_binary_data(path)
    logging.info('Creating model')

    model = autoencoder.TextAutoencoder(args.lstm_units, embeddings,
                                        wd.eos_index, args.num_gpus)
    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    config.gpu_options.allocator_type = 'BFC'
    sess = tf.InteractiveSession(graph=model.g, config=config)
    sess.run(tf.global_variables_initializer())
    model.g.finalize()
    show_parameter_count(model.get_trainable_variables())
    logging.info('Initialized the model and all variables. Starting training.')
                        help='Numpy embeddings file. If not supplied, '
                        'random embeddings are generated.')
    parser.add_argument('vocab', help='Vocabulary file')
    parser.add_argument('train', help='Training set')
    parser.add_argument('valid', help='Validation set')
    args = parser.parse_args()

    logging.basicConfig(level=logging.INFO)

    sess = tf.Session()
    wd = utils.WordDictionary(args.vocab)
    embeddings = load_or_create_embeddings(args.embeddings, wd.vocabulary_size,
                                           args.embedding_size)

    logging.info('Reading training data')
    train_data = utils.load_binary_data(args.train)
    logging.info('Reading validation data')
    valid_data = utils.load_binary_data(args.valid)
    logging.info('Creating model')

    train_embeddings = args.train_embeddings if args.embeddings else True
    model = autoencoder.TextAutoencoder(args.lstm_units,
                                        embeddings,
                                        wd.eos_index,
                                        train_embeddings=train_embeddings,
                                        bidirectional=args.bidirectional)

    sess.run(tf.global_variables_initializer())
    show_parameter_count(model.get_trainable_variables())
    logging.info('Initialized the model and all variables. Starting training.')
    model.train(sess,