Exemplo n.º 1
0
def validate_sentence(session, model, validation_batch, encoder_state, current_step):
    encoder_inputs, single_decoder_inputs, decoder_weights = validation_batch.next()
    print(BatchGenerator.batches2string(encoder_inputs))
    print(BatchGenerator.batches2string(single_decoder_inputs))
    # replicate to full batch size so we have multiple results agains the whole state
    encoder_inputs = [np.repeat(x, BATCH_SIZE, axis=0) for x in encoder_inputs]
    decoder_inputs = [np.repeat(x, BATCH_SIZE, axis=0) for x in single_decoder_inputs]
    decoder_weights = [np.repeat(x, BATCH_SIZE, axis=0) for x in decoder_weights]
    # _, eval_loss, prediction = model.step(sess, current_step - 1, encoder_inputs, decoder_inputs,
    #                                      decoder_weights, enc_state[-1:], 1.0, True)
    _, eval_loss, prediction = model.step(session, current_step - 1, encoder_inputs, decoder_inputs,
                                          decoder_weights, encoder_state, 1.0, True)
    # split into 'no of batches' list then average across batches
    reshaped = np.reshape(prediction, (prediction.shape[0] / BATCH_SIZE, BATCH_SIZE, prediction.shape[1]))
    averaged = np.mean(reshaped, axis=1)
    # now roll as in case of single batch
    rolled = np.rollaxis(np.asarray([averaged]), 1, 0)
    splitted = np.vsplit(rolled, rolled.shape[0])
    squeezed = [np.squeeze(e,0) for e in splitted]
    print(BatchGenerator.batches2string(squeezed))
    # compute character to character perplexity
    val_perp = float(np.exp(BatchGenerator.logprob(np.concatenate(squeezed),
                                                   np.concatenate(single_decoder_inputs[1:]))))
    print('--validation perp.: %.2f' % val_perp)
    return val_perp
Exemplo n.º 2
0
def main(_):
  """
  The model specified command line arg --model_dir is applied to every data
  point in --test_datafile and the model output is sent to --output. The unix
  command 'paste' can be used to stich the input file and output together.
  e.g.,
  $ classifiy_data.py --config=train.conf --test_datafile=test.dat > output.dat
  $ paste -d ' ' test.dat output.dat > input_and_output.dat
  """
  configs.DEFINE_string('test_datafile',None,'file with test data')
  configs.DEFINE_string('time_field','date','fields used for dates/time')
  configs.DEFINE_string('print_start','190001','only print data on or after')
  configs.DEFINE_string('print_end','999912','only print data on or before')
  configs.DEFINE_integer('num_batches',None,'num_batches overrride')

  config = configs.get_configs()

  if config.test_datafile is None:
     config.test_datafile = config.datafile

  batch_size = 1
  data_path = model_utils.get_data_path(config.data_dir,config.test_datafile)

  # print("Loading data %s"%data_path)

  dataset = BatchGenerator(data_path, config,
                             batch_size=batch_size,
                             num_unrollings=config.num_unrollings)

  num_data_points = dataset.num_batches
  if config.num_batches is not None:
     num_data_points = config.num_batches

  #print("num_batches = ", num_data_points)

  tf_config = tf.ConfigProto( allow_soft_placement=True,
                                log_device_placement=False )

  with tf.Graph().as_default(), tf.Session(config=tf_config) as session:

    #print("Loading model.")

    model = model_utils.get_trained_model(session, config, verbose=False)

    for i in range(num_data_points):

      batch = dataset.next_batch()
      preds = model.step(session, batch)
      seq_len = get_seq_length(batch)
      key, date = get_key_and_date(batch, seq_len-1)

      if (date < config.print_start or date > config.print_end):
        continue

      score  = get_score(config, preds, seq_len-1)
      target = get_target(config, batch, seq_len-1)

      print("%s %s %.6f %.6f %d" % (key, date, score, target, seq_len))
def main():
    restore_model = args.restore
    print(restore_model)
    seq_len = args.seq_len
    batch_size = args.batch_size
    num_epoch = args.epochs
    batches_per_epoch = 1000

    batch_generator = BatchGenerator(batch_size, seq_len)
    g, vs = create_graph(batch_generator.num_letters,
                         batch_size,
                         num_units=args.units,
                         lstm_layers=args.lstm_layers,
                         window_mixtures=args.window_mixtures,
                         output_mixtures=args.output_mixtures)

    with tf.Session(graph=g) as sess:
        model_saver = tf.train.Saver(max_to_keep=2)
        if restore_model:
            model_file = tf.train.latest_checkpoint(
                os.path.join(restore_model, 'models'))
            experiment_path = restore_model
            epoch = int(model_file.split('-')[-1]) + 1
            model_saver.restore(sess, model_file)
        else:
            sess.run(tf.global_variables_initializer())
            experiment_path = next_experiment_path()
            epoch = 0

        summary_writer = tf.summary.FileWriter(experiment_path,
                                               graph=g,
                                               flush_secs=10)
        summary_writer.add_session_log(
            tf.SessionLog(status=tf.SessionLog.START),
            global_step=epoch * batches_per_epoch)

        for e in range(epoch, num_epoch):
            print('\nEpoch {}'.format(e))
            for b in range(1, batches_per_epoch + 1):
                coords, seq, reset, needed = batch_generator.next_batch()
                if needed:
                    sess.run(vs.reset_states, feed_dict={vs.reset: reset})
                l, s, _ = sess.run([vs.loss, vs.summary, vs.train_step],
                                   feed_dict={
                                       vs.coordinates: coords,
                                       vs.sequence: seq
                                   })
                summary_writer.add_summary(s,
                                           global_step=e * batches_per_epoch +
                                           b)
                print('\r[{:5d}/{:5d}] loss = {}'.format(
                    b, batches_per_epoch, l),
                      end='')

            model_saver.save(sess,
                             os.path.join(experiment_path, 'models', 'model'),
                             global_step=e)
Exemplo n.º 4
0
def load_train_valid_data(config, verbose):
    """
    Returns train_data and valid_data, both as BatchGenerator objects.
    """
    data_path = data_utils.get_data_path(config.data_dir, config.datafile)
    batches = BatchGenerator(data_path, config, verbose=verbose)
    train_data = batches.train_batches()
    valid_data = batches.valid_batches()
    return train_data, valid_data
    def build_dataset(self):
        subjects_videos = self.get_subjects_videos()
        train_videos, val_videos = self.train_val_split(subjects_videos)

        train_aligns = self.generate_align_hash(train_videos)
        val_aligns = self.generate_align_hash(val_videos)

        self.train_generator = BatchGenerator(train_videos, train_aligns,
                                              self.batch_size)
        self.val_generator = BatchGenerator(val_videos, val_aligns,
                                            self.batch_size)
Exemplo n.º 6
0
def predict(config):

    datafile = config.datafile

    if config.predict_datafile is not None:
        datafile = config.predict_datafile

    print("Loading data from %s ..." % datafile)
    path = utils.data_utils.get_data_path(config.data_dir, datafile)

    config.batch_size = 1
    batches = BatchGenerator(path,
                             config,
                             require_targets=config.require_targets,
                             verbose=True)
    batches.cache(verbose=True)

    tf_config = tf.ConfigProto(allow_soft_placement=True,
                               log_device_placement=False)

    with tf.Graph().as_default(), tf.Session(config=tf_config) as session:

        model = model_utils.get_model(session, config, verbose=True)

        perfs = dict()

        for i in range(batches.num_batches):
            batch = batches.next_batch()

            (mse, preds) = model.step(session, batch)
            # (mse, preds) = model.debug_step(session, batch)

            if math.isnan(mse) is False:
                date = batch_to_date(batch)
                if date not in perfs:
                    perfs[date] = list()
                perfs[date].append(mse)

            if config.pretty_print_preds is True:
                pretty_print_predictions(batches, batch, preds, mse)
            else:
                print_predictions(batches, batch, preds)

        if config.mse_outfile is not None:
            with open(config.mse_outfile, "w") as f:
                for date in sorted(perfs):
                    mean = np.mean(perfs[date])
                    print("%s %.6f %d" % (date, mean, len(perfs[date])),
                          file=f)
                total_mean = np.mean([x for v in perfs.values() for x in v])
                print("Total %.6f" % (total_mean), file=f)
            f.closed
        else:
            exit()
Exemplo n.º 7
0
def create_generator(data_dict, batch_params):
    batch_gen_dict = {i: None for i in ['train', 'validation', 'test']}
    batch_gen_dict['validation'] = BatchGenerator(data_dict['validation'],
                                                  cut_start=False,
                                                  **batch_params)
    batch_gen_dict['train'] = BatchGenerator(data_dict['train'],
                                             cut_start=True,
                                             **batch_params)
    batch_gen_dict['test'] = BatchGenerator(data_dict['test'],
                                            cut_start=True,
                                            **batch_params)
    return batch_gen_dict
Exemplo n.º 8
0
def semantic_inference():
    """
    Train phase main process
    :return:
    """
    config = PanopticConfig()
    if not os.path.exists(config.dump_root_dir):
        os.makedirs(config.dump_root_dir)

    batch_generator = BatchGenerator(config=config)
    session_config = tf.compat.v1.ConfigProto(allow_soft_placement=True,
                                              log_device_placement=True)

    with tf.compat.v1.Session(config=session_config) as sess:
        model = PanopticSemanticSegModel(config=config)

        tf.compat.v1.global_variables_initializer().run()
        saver = tf.compat.v1.train.Saver(max_to_keep=None)
        saver.restore(
            sess,
            os.path.join(
                config.dump_model_para_root_dir,
                'epoch_13_train_loss_0.214400_val_loss_0.210000.ckpt'))

        # global inference phase
        batch_index = 0
        while True:
            image_batch, gt_batch, semantic_gt_batch, imgs_names_batch, valid = \
                batch_generator.next_batch_inference()
            if not valid:
                break

            val_batch_loss, semantic_seg_probs = sess.run(
                fetches=[
                    model.loss,
                    model.semantic_seg_probs,
                ],
                feed_dict={
                    model.input_data: image_batch,
                    model.ground_truth: gt_batch
                })

            batch_index += 1

            print('[Global Inference] Batch {}: loss = {}.'.format(
                batch_index, np.round(val_batch_loss, 4)))

            # dump into local disk
            dump_to_local_disk(config.dump_root_dir, imgs_names_batch,
                               image_batch, semantic_gt_batch,
                               semantic_seg_probs)

        sess.close()
Exemplo n.º 9
0
def main(args):
    mode = args.mode
    overwrite_flag = args.overwrite

    model_name = 'trajgru'
    data_folder = 'data'
    hurricane_path = os.path.join(data_folder, 'ibtracs.NA.list.v04r00.csv')
    results_folder = 'results'

    config_obj = Config(model_name)
    data = DataCreator(hurricane_path, **config_obj.data_params)
    hurricane_list, weather_list = data.hurricane_list, data.weather_list

    if mode == 'train':
        print("Starting experiments")
        for exp_count, conf in enumerate(config_obj.conf_list):
            print('\nExperiment {}'.format(exp_count))
            print('-*-' * 10)

            batch_generator = BatchGenerator(hurricane_list=hurricane_list,
                                             weather_list=weather_list,
                                             batch_size=conf["batch_size"],
                                             window_len=conf["window_len"],
                                             phase_shift=conf["phase_shift"],
                                             return_mode=conf['return_mode'],
                                             cut_start=conf['cut_start'],
                                             vector_mode=conf['vector_mode'],
                                             vector_freq=conf['vector_freq'],
                                             **config_obj.experiment_params)

            train(model_name, batch_generator, exp_count, overwrite_flag,
                  **conf)

    elif mode == 'test':
        best_model, best_conf, trainer = select_best_model(results_folder)

        batch_generator = BatchGenerator(hurricane_list=hurricane_list,
                                         weather_list=weather_list,
                                         batch_size=best_conf["batch_size"],
                                         window_len=best_conf["window_len"],
                                         phase_shift=best_conf["phase_shift"],
                                         return_mode=best_conf['return_mode'],
                                         cut_start=best_conf['cut_start'],
                                         vector_mode=best_conf['vector_mode'],
                                         vector_freq=best_conf['vector_freq'],
                                         **config_obj.experiment_params)

        print("Testing with best model...")
        predict(best_model, batch_generator, trainer)

    else:
        raise ValueError('input mode: {} is not found'.format(mode))
Exemplo n.º 10
0
def main(mode):
    print('Loading data...')
    data = LoadData(dataset_path='dataset', images_path='dataset/images/')

    print('Creating Batch Generator...')
    batch_creator = BatchGenerator(data_dict=data.data_dict,
                                   captions_int=data.captions_int,
                                   image_addr=data.image_addr,
                                   **batch_params)

    if mode == 'train':
        print('Creating Models...')
        caption_model = CaptionLSTM(model_params=model_params,
                                    int2word=data.int2word)

        print('Starting training...')
        class_weights = calc_class_weights(data.captions_int.values)
        train(caption_model, batch_creator, class_weights, **train_params)

    elif mode == 'sample':
        print('Loading model...')
        model_file = open('vgg_lstm.pkl', 'rb')
        model = pickle.load(model_file)
        print('Creating sample..')
        sample(model, batch_creator, top_k=10, seq_len=16, show_image=True)

    elif mode == 'test':
        print('Loading model')
        model_file = open('vgg_lstm.pkl', 'rb')
        model = pickle.load(model_file)
        print('Testing model...')
        test(model, batch_creator, top_k=10, seq_len=16)
Exemplo n.º 11
0
def main(train_file, test_file, job_dir, session):
    y_train, y_validation = load_data(train_file)
    y_train = np.array([j[1:] for j in y_train])
    y_validation = np.array([j[1:] for j in y_validation])

    epochs = 10
    batch_size = 64

    #input_dir=job_dir+'data/train'
    training_gen = BatchGenerator(input_dir=job_dir+'data/train',
                                  y=y_train,
                                  epochs=epochs,
                                  batch_size=batch_size,
                                  session=session)
    validation_gen = BatchSequence(input_dir=job_dir+'data/validation',
                                    y=y_validation,
                                    batch_size=batch_size,
                                    session=session)

    model = create_model()

    #model.fit_generator(generator=training_gen,
#                        steps_per_epoch=int(len(y_train)/batch_size),
#                        epochs=epochs,
#                        validation_data=validation_gen,
#                        validation_steps=int(len(y_validation)/batch_size))

    for i in range(epochs):
        for batch_x, batch_y in training_gen:
            model.fit(batch_x, batch_y)

    model.save(job_dir + 'models/vgg16.h5')
def train(in_vocabulary, in_embeddings, in_config, resume=False):
    logging.info('Training the model')
    model = create_model(in_vocabulary, in_vocabulary, in_embeddings,
                         in_config['buckets'][BUCKET_ID][0],
                         in_config['buckets'][BUCKET_ID][1], in_config)
    MODEL_FILE = in_config['model_weights']
    if resume:
        model.load_weights(MODEL_FILE)

    encoder_input_file = path.join(in_config['data_folder'],
                                   'train_encoder_{}.npy'.format(BUCKET_ID))
    decoder_input_file = path.join(in_config['data_folder'],
                                   'train_decoder_{}.npy'.format(BUCKET_ID))
    train_batch_generator = BatchGenerator(encoder_input_file,
                                           decoder_input_file,
                                           in_config['batch_size'],
                                           in_vocabulary)
    save_callback = ModelCheckpoint(MODEL_FILE,
                                    monitor='val_loss',
                                    verbose=1,
                                    save_best_only=False,
                                    save_weights_only=True,
                                    mode='auto')
    # demo_callback = DecodingDemo(in_vocabulary, in_embeddings, BUCKET_ID, in_config, np.load(encoder_input_file)[:10])
    model.fit_generator(
        generate_sequences(train_batch_generator),
        nb_epoch=in_config['nb_epoch'],
        samples_per_epoch=in_config['samples_per_epoch'],
        callbacks=[save_callback]  # , demo_callback]
    )
    evaluate(in_vocabulary, in_embeddings, in_config)
Exemplo n.º 13
0
def load_all_data(config):
    """
    Returns all data as a BatchGenerator object.
    """
    data_path = get_data_path(config.data_dir, config.datafile)
    batches = BatchGenerator(data_path, config)

    return batches
Exemplo n.º 14
0
def load_all_data(config, is_training_only=False):
    """
    Returns all data as a BatchGenerator object.
    """
    data_path = get_data_path(config.data_dir, config.datafile)
    batches = BatchGenerator(data_path, config, is_training_only=is_training_only)
    
    return batches
Exemplo n.º 15
0
class TestBaseGenerator(TestCase):
    """Class testing a batch generator"""
    def setUp(self):
        self.batch_gen = BatchGenerator(DATA_PATH, VAL_DATA_PATH, 2)
        self.batch_gen.load_data()

    def test_check_init(self):
        self.assertEqual(self.batch_gen.data_dir, DATA_PATH)
        self.assertEqual(self.batch_gen.batch_size, 2)

    def test_no_data_path(self):
        with self.assertRaises(NoDataPath):
            self.batch_gen.data_dir = None

        with self.assertRaises(NoDataPath):
            self.batch_gen.data_dir = ''

    def test_load_files_names(self):
        f_names = np.array(
            [['test_data/000000117764.jpg', 'test_data/000000117764_mask.jpg'],
             ['test_data/000000117857.jpg', 'test_data/000000117857_mask.jpg'],
             ['test_data/000000118061.jpg',
              'test_data/000000118061_mask.jpg']])
        np.testing.assert_equal(self.batch_gen.images_pairs, f_names)

    def test_num_batches(self):
        self.assertEqual(self.batch_gen.num_batches, 2)

    def test_loaded_dataset_shape(self):
        x_shape = (2, 256, 256, 3)
        y_shape = (2, 256, 256, 3)
        x, y = next(self.batch_gen.train_batches)
        self.assertEqual(x.shape, x_shape)
        self.assertEqual(y.shape, y_shape)
        self.assertEqual(y.max(), 1)
        self.assertEqual(y.min(), 0)

    def test_generate_test_batch(self):
        batch_shape = (1, 256, 256, 3)
        train_batch, val_batch = self.batch_gen.generate_test_batch(
            batch_size=1)
        self.assertEqual(train_batch[0].shape, batch_shape)
        self.assertEqual(train_batch[1].shape, batch_shape)
        self.assertEqual(val_batch[0].shape, batch_shape)
        self.assertEqual(val_batch[1].shape, batch_shape)
Exemplo n.º 16
0
 def __init__(self, pieces, batch_size, num_unrolling):
     self._batch_generator = list()
     batch_size_each = batch_size // len(
         pieces)  # TODO exactly fix batch_size
     if batch_size_each == 0:
         batch_size_each = 1
     for p in pieces:
         self._batch_generator.append(
             BatchGenerator(p, batch_size_each, num_unrolling))
Exemplo n.º 17
0
def run_test():
    # get data directory if specified
    if len(sys.argv) == 2:
        data_path = sys.argv[1]
    else:
        data_path = "./data"

    train_path = join(data_path, "train")
    val_path = join(data_path, "val")

    ### precomputed from training set
    mean = -12.
    std = 3.0

    model = ResNetModel(120)
    train = BatchGenerator(train_path, 8, mean, std)
    val = BatchGenerator(val_path, 8, mean, std)

    model.fit(train, val, steps_per_epoch=iterations_per_epoch)
Exemplo n.º 18
0
def test_sequence_generator():
    MODEL_FILE = path.join(WORKING_DIR, 'model.h5')
    vocab, rev_vocab, embeddings, enc_train_ids_path, dec_train_ids_path, enc_dev_ids_path, dec_dev_ids_path = prepare_data(
    )
    train_batch_generator = BatchGenerator(enc_train_ids_path,
                                           dec_train_ids_path, 1, len(vocab),
                                           BUCKETS[1])
    for _, batch in zip(xrange(1000000),
                        generate_sequences(train_batch_generator)):
        assert batch is not None
Exemplo n.º 19
0
def run_test():
    if len(sys.argv) != 2:
        print("Usage: model_linear_norm.py data_directory")
        sys.exit(0)

    data_path = sys.argv[1]

    train_path = join(data_path, "train")
    val_path = join(data_path, "val")

    ### precomputed from training set
    mean = -12.
    std = 3.0

    model = ModelLinearNorm(120)
    train = BatchGenerator(train_path, 64, mean, std)
    val = BatchGenerator(val_path, 64, mean, std)

    model.fit(train, val)
def main():
    seq_len = 256
    batch_size = 64
    epochs = 30
    batches_per_epoch = 1000

    batch_generator = BatchGenerator(batch_size, seq_len)
    g, vs = create_graph(batch_generator.num_letters, batch_size)

    with tf.Session(graph=g) as sess:
        model_saver = tf.train.Saver(max_to_keep=2)
        sess.run(tf.global_variables_initializer())
        model_path = get_model_path()

        summary_writer = tf.summary.FileWriter(model_path,
                                               graph=g,
                                               flush_secs=10)
        summary_writer.add_session_log(
            tf.SessionLog(status=tf.SessionLog.START), global_step=0)
        for e in range(epochs):
            print('\n{} : Epoch {}'.format(datetime.datetime.now().time(), e))
            for b in range(1, batches_per_epoch + 1):
                coordinates, labels, reset, to_reset = batch_generator.next_batch(
                )
                if to_reset:
                    sess.run(vs.reset_states, feed_dict={vs.reset: reset})
                loss, s, _ = sess.run([vs.loss, vs.summary, vs.train_step],
                                      feed_dict={
                                          vs.coordinates: coordinates,
                                          vs.sequence: labels
                                      })
                summary_writer.add_summary(s,
                                           global_step=e * batches_per_epoch +
                                           b)
                print('\r[{:5d}/{:5d}] loss = {}'.format(
                    b, batches_per_epoch, loss),
                      end='')

            model_saver.save(sess,
                             os.path.join(model_path, 'models', 'model'),
                             global_step=e)
def evaluate(in_vocabulary, in_embeddings, in_config):
    logging.info('Evaluating the trained model')

    model = create_model(in_vocabulary,
                         in_vocabulary,
                         in_embeddings,
                         in_config['buckets'][BUCKET_ID][0],
                         in_config['buckets'][BUCKET_ID][1],
                         in_config,
                         mode=Mode.TEST)
    MODEL_FILE = in_config['model_weights']
    model.load_weights(MODEL_FILE)

    encoder_input_file = path.join(in_config['data_folder'],
                                   'test_encoder_{}.npy'.format(BUCKET_ID))
    decoder_input_file = path.join(in_config['data_folder'],
                                   'test_decoder_{}.npy'.format(BUCKET_ID))
    test_batch_generator = BatchGenerator(encoder_input_file,
                                          decoder_input_file,
                                          in_config['batch_size'],
                                          in_vocabulary)
    print model.evaluate_generator(generate_sequences(test_batch_generator),
                                   test_batch_generator.get_dataset_size())
Exemplo n.º 22
0
    def train(self, use_generator=True, resume_train=False):

        batch_generator = BatchGenerator(self.w2v_reader, self.t2v_reader,
                                         self.tagger)
        self.batch_generator = batch_generator

        dropout = ns.DROPOUT
        reg_alpha = ns.REG_ALPHA
        units = ns.UNITS
        layers = ns.LAYERS

        print(
            '>> Compiling model... dropout = {}, reg_alpha = {}, units = {}, layers = {}'
            .format(dropout, reg_alpha, units, layers))

        self.compile(dropout=dropout,
                     reg_alpha=reg_alpha,
                     units=units,
                     layers=layers)

        # ner_model.print_summary()

        if use_generator:
            batch_size = ns.BATCH_SIZE_GENERATOR
            nb_epoch = ns.NB_EPOCH_GENERATOR
            max_q_size = ns.MAX_Q_SIZE
            nb_worker = ns.NB_WORKER
            pickle_safe = ns.PICKLE_SAFE

            print(
                '>> Training model... (using fit_generator) - epochs = {}, batch_size = {}'
                .format(nb_epoch, batch_size))
            self.train_on_generator(resume_train=resume_train,
                                    nb_epoch=nb_epoch,
                                    samples_per_epoch=batch_size,
                                    max_q_size=max_q_size,
                                    nb_worker=nb_worker,
                                    pickle_safe=pickle_safe)
        else:
            batch_size = ns.BATCH_SIZE
            nb_epoch = ns.NB_EPOCH
            save_every_nb_iterations = ns.SAVE_EVERY_NB_ITERATIONS

            print(
                '>> Training model... (using fit) -  epochs = {}, batch_size = {}'
                .format(nb_epoch, batch_size))
            self.train_on_batches(
                batch_size=batch_size,
                nb_epoch=nb_epoch,
                save_every_nb_iterations=save_every_nb_iterations)
Exemplo n.º 23
0
def main():
    # load midi
    dirpath = '../'
    pieces = loadPieces(dirpath)

    # divide train valid
    valid_pieces = pieces[:num_valid]
    train_pieces = pieces[num_valid:]
    valid_gen = BatchGenerator(pieces[0], valid_batch_size, 1)
    train_gen = MixedGenarator(pieces, batch_size, num_unrolling)

    # create model ans start training
    model = LSTM_model(layer_size, batch_size, num_unrolling)
    model.train(train_gen, valid_gen, train_step=10000, summary_frequency=100)
Exemplo n.º 24
0
def train_srgan(args):    
    np.random.seed(args.random_seed)
    
    hr_size = tuple(args.hr_size) + (3,)
    files = np.array(glob.glob(args.images_dir))
    files  = files[np.random.permutation(files.shape[0])]
    if files.shape[0]==0:
        raise FileNotFoundError("Training images not found")

    if args.log_images > 0:
        images_idx = np.random.randint(0,files.shape[0],args.log_images)
        files = np.setdiff1d(files,files[images_idx])
        
        tracking_images_lr = []
        tracking_images_hr = []
        for i,f in enumerate(images_idx):
            os.mkdir("images/%d"%i)
            img = Image.open(files[f])
            img.save("images/%d/high_resolution.png"%i)
            tracking_images_hr.append(np.array(img)/127.5-1.)
            orig_size = img.size
            img = img.resize((img.size[0]//4,img.size[1]//4), Image.BICUBIC)
            img.resize(orig_size , Image.NONE).save("images/%d/low_resolution.png"%i)
            img.resize(orig_size , Image.BICUBIC).save("images/%d/bicubic.png"%i)
            tracking_images_lr.append(np.array(img)/127.5-1.)
    
    files = files[:args.max_images]
    
    batch_generator = BatchGenerator(files, args.batch_size, cache_size=10, image_size=hr_size[:2])
    from model import SRGAN
    srgan = SRGAN(hr_img_shape=hr_size, random_seed = args.random_seed)
    srgan.restore_models()
    
    for step in tqdm(range(args.training_steps), total=args.training_steps):
        batch_data = next(batch_generator)/255.*2.-1
        srgan.train_step_generator(batch_data, summary_step=step if step%args.steps_tensorboard==0 else None)
        srgan.train_step_discriminator(batch_data)
        #LOG IMAGES
        if args.log_images > 0 and step%args.steps_log==0:
            for i,f in enumerate(tracking_images_lr):
                hr_generated = srgan.increase_resolution(np.expand_dims(f,0))[0]
                hr_generated = ((hr_generated+1.)*127.5).astype(np.uint8)
                img = Image.fromarray(hr_generated)
                img.save("images/%d/generated_step_%d.png"%(i,step))
        if step%args.steps_checkpoint==0:
            srgan.save_models()
    srgan.save_models()
    if args.verbose > 0:
        print("Training done")
Exemplo n.º 25
0
def main():

    config = deep_quant.get_configs()

    data_path = data_utils.get_data_path(config.data_dir, config.datafile)

    print("Loading data ...")
    sys.stdout.flush()

    batches = BatchGenerator(data_path, config, require_targets=True)

    print("Num batches %d" % batches.num_batches)
    sys.stdout.flush()

    batching(batches)
Exemplo n.º 26
0
def main(train_file, test_file, job_dir):
    y_train, y_validation = load_data(train_file)
    print("test1")
    training_gen = BatchGenerator(input_dir=images_path_train,
                                  y=y_train,
                                  batch_size=32,
                                  shuffle=False,
                                  img_size=290)

    for batch_x, batch_y in training_gen:
        print(batch_x.shape)
        print(batch_y.shape)
        break

    print("succes")
    print(y_train.shape, y_validation.shape)
Exemplo n.º 27
0
def train():
    graph = tf.Graph()
    inputs, labels, sequence_lengths, optimizer, loss, predictions, summary_op, saver = model(
        graph)
    train_writer = tf.train.SummaryWriter('tensorboard/train', graph)
    test_writer = tf.train.SummaryWriter('tensorboard/test', graph)

    with tf.Session(graph=graph) as sess:
        sess.run(tf.initialize_all_variables())
        print("Training...")
        train_batches = BatchGenerator(settings.BATCH_SIZE)
        test_batches = BatchGenerator(3000, settings.SEQUENCE_LIMIT)
        test_inputs, test_labels, test_sequence_lengths = test_batches.next()

        for step in xrange(settings.EPOCH):
            if step % 1000 != 1:
                train_inputs, train_labels, train_sequence_lengths = train_batches.next(
                )
                feed_dict = {
                    inputs: train_inputs,
                    labels: train_labels,
                    sequence_lengths: train_sequence_lengths
                }
                _, train_loss, train_predictions, summary = sess.run(
                    [optimizer, loss, predictions, summary_op],
                    feed_dict=feed_dict)
                train_writer.add_summary(summary, step)
                train_writer.flush()
            else:
                test_feed_dict = {
                    inputs: test_inputs,
                    labels: test_labels,
                    sequence_lengths: test_sequence_lengths
                }
                test_loss, summary = sess.run([loss, summary_op],
                                              feed_dict=test_feed_dict)
                test_writer.add_summary(summary, step)
                test_writer.flush()

            if step % 1000 == 0:
                print('-----------Step %d:-------------' % step)
                print('Training set:')
                print('  Loss       : ', train_loss)
                print('  Input      : ', train_inputs[0])
                print('  Label      : ', utils.onehot2sticker(train_labels))
                print('  Prediction : ',
                      utils.onehot2sticker(train_predictions))

            if step % 10000 == 0:
                # Save the variables to disk.
                save_path = saver.save(sess,
                                       "checkpoints/" + settings.CKPT_NAME)
                print("Model saved in file: %s" % save_path)
Exemplo n.º 28
0
def main(mode):
    dataset_path = 'data'
    data = LoadData(dataset_path=dataset_path, **data_params)

    print('Creating Batch Generator...')
    batch_gen = BatchGenerator(data_dict=data.data_dict,
                               label_dict=data.label_dict,
                               **batch_params)

    if mode == 'train':
        train(vocabs=[data.word2int, data.int2word],
              batch_gen=batch_gen,
              train_params=train_params,
              model_params=model_params)

    elif mode == 'test':
        print('Loading model')
        model_file = open('results/seq2seq.pkl', 'rb')
        model = pkl.load(model_file)
        print('Testing model...')
        test(model, data.int2word, batch_gen)
Exemplo n.º 29
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--experiment', action='store', required=False, type=int, default=None)
    parser.add_argument('--mode', required=False)  # Just for pycharm
    parser.add_argument('--port', required=False)  # Just for pycharm
    args = parser.parse_args()
    if args.experiment is not None:
        experiment = args.experiment
    else:
        experiment = 0

    negative_type, loss, testing_model = get_experiment(experiment)
    exp_name = f'xfin_{negative_type}_{testing_model}_{loss.__name__}'
    save_dir = os.path.join(os.getcwd(), exp_name)
    print('IDEXP: {}'.format(exp_name))
    os.makedirs(save_dir, exist_ok=True)

    # Training settings
    n_epochs = 5000  # Maximum number of training epochs
    patience_epochs = 50  # Stop training if results don't improve
    batch_size = 16
    model_name = 'triplet_matching.h5'
    learning_rate = 1e-4
    multi_gpu = True  # Train on multiple GPUs (set up for 2)
    batch_normalization = False  # Use BN, False for the results in the paper
    data_augmentation = True
    dropout = True

    # Data parameters
    acc_type = 'ACC1.000000'  # Waist: 'ACC0.000000', Wrist: 'ACC1.000000'. Only wrist is used in the paper
    data_type = 'acc_sil_100_95_clean'
    data_type_test = 'acc_sil_100_00_clean'
    subjects_train = range(1, 8)
    subjects_test = range(8, 11)
    negative_type_test = '50_DSDA-50_DSSA'

    # Only use moving activities
    activity_filter = ['walking', 'wiping', 'vacuuming', 'sweeping', 'exercising', 'stretching', 'cleaning']

    # Pre-training
    pre_training_model = False  # Used to resume previous training

    # Set random seeds
    seed(0)  # Python
    np.random.seed(0)  # Numpy
    set_random_seed(0)  # Tensorflow

    # Data paths
    data_path = os.path.join(r'/mnt/storage/scratch/user/calorie_reid', data_type)
    data_path_test = os.path.join(r'/mnt/storage/scratch/user/calorie_reid', data_type_test)
    max_queue_size = 8
    n_workers = 2
    load_zip_memory = True
    use_multiprocess = True

    # Define Keras models for potential multiple-gpu training
    if multi_gpu:
        with tf.device('/cpu:0'):
            model = triplet_network(testing_model, use_bn=batch_normalization, use_dropout=dropout)
            if pre_training_model:
                model.load_weights(os.path.join(save_dir, pre_training_model), by_name=True)

        parallel_model = multi_gpu_model(model, gpus=2)
        parallel_model.compile(optimizer=optimizers.Adam(lr=learning_rate),
                               loss=loss, metrics=[good_distance, bad_distance, triplet_acc])
    else:
        model = triplet_network(testing_model, use_bn=batch_normalization, use_dropout=dropout)
        model.compile(optimizer=optimizers.Adam(lr=learning_rate),
                      loss=loss, metrics=[good_distance, bad_distance, triplet_acc])

        if pre_training_model:
            model.load_weights(os.path.join(save_dir, pre_training_model), by_name=True)
        parallel_model = model

    # Define training callbacks
    # Checkpointer saves the best model based on the minimum validation loss
    checkpointer = MultiGpuCheckpoint(model, filepath=os.path.join(save_dir, model_name),
                                      verbose=1, save_best_only=True, monitor='val_loss', save_weights_only=True)
    # This callback takes care of plotting loss, matching distance, non-matching distance and triplet accuracy during
    # training
    plot_results = PlotResuls(loss_each=1, saveloss='plots', filepath=save_dir)

    # Define the data loaders, used for training (gen) and validation (validgen)
    gen = BatchGenerator(data_folder=data_path, batch_size=batch_size, acc_folder=acc_type, name='training',
                         negative_type=negative_type, load_zip_memory=load_zip_memory, subjects=subjects_train,
                         activity=activity_filter, acc_augmentation=False, vid_augmentation=data_augmentation, shuffle=True)
    validgen = BatchGenerator(data_folder=data_path_test, batch_size=batch_size, acc_folder=acc_type, name='validating',
                              negative_type=negative_type_test, load_zip_memory=load_zip_memory, subjects=subjects_test,
                              shuffle=True, activity=activity_filter, acc_augmentation=False, vid_augmentation=False)
    # Define additional callbacks that make use of the data generators
    # This callback updates the combination of triplets (negative samples) at the end of each epoch
    update_mapping = LambdaCallback(on_epoch_end=gen.generate_mapping)
    # This callback calculates and plot the progress of the area under the ROC curves
    auc_callback = AuROC(model, validgen, model_name, patience=patience_epochs, filepath=save_dir)

    validation_steps = len(validgen)
    parallel_model.fit_generator(gen, max_queue_size=max_queue_size, validation_data=validgen,
                                 validation_steps=validation_steps, epochs=n_epochs,
                                 use_multiprocessing=use_multiprocess, workers=n_workers,
                                 callbacks=[checkpointer, plot_results, update_mapping, auc_callback])
    # Save the final model (is not the optimal model but it's useful for debugging)
    parallel_model.save(os.path.join(save_dir, 'final_' + model_name))

    # Load the best model based on the auROC values
    model.load_weights(os.path.join(save_dir, 'AUC_' + model_name))
    # Calculate the video and accelerometer features for the validation data and evaluate the results
    vidbox_ft, acc_ft, all_labels, validgen = calculate_features(model, data_path_test, batch_size, load_zip_memory,
                                                                 acc_type, activity_filter, subjects_test)
    calculate_ROC(vidbox_ft, acc_ft, all_labels, validgen, filepath=save_dir)
    plot_features(vidbox_ft, acc_ft, all_labels, validgen, filepath=save_dir)
    calculate_mAP(vidbox_ft, acc_ft, all_labels, validgen, filepath=save_dir)
    metrics_vs_n_people(vidbox_ft, acc_ft, all_labels, filepath=save_dir)
    # Save the results
    np.savez_compressed(os.path.join(save_dir, 'features'), vidbox_ft=vidbox_ft, acc_ft=acc_ft, all_labels=all_labels)
Exemplo n.º 30
0
    def fit(self, tfrecord_path=TFRECORD_PATH,
            model_dir=MODEL_DIR, num_steps=-1,
            input_size=[CNN_FRAME_SIZE, CNN_VIDEO_HEIGHT, CNN_VIDEO_WIDTH, 3],
            batch_size=BATCH_SIZE, label_size=LABEL_SIZE,
            learning_rate=LEARNING_RATE, num_test_batches=NUM_TEST_BATCHES,
            display_train_loss_step=DISPLAY_TRAIN_LOSS_STEP,
            display_test_loss_step=DISPLAY_TEST_LOSS_STEP):
        """
        Fit CNN model.
        """

        # Initialize model paths.
        model_path = model_dir + "/model.ckpt"
        self.init_model_paths(model_path)
        self.phase = "train"
        self.batch_size = batch_size

        # Initialize model.
        tf.reset_default_graph()
        self.build_model(input_size, label_size)

        # Create loss.
        with tf.variable_scope("loss_error"):
            loss_function, cross_entropy_classes, cross_entropy_action = \
                self.build_loss()

        # Create optimization function.
        optimizer = self.optimizer(loss_function, learning_rate)

        # Create summaries.
        train_writer, test_writer, loss_summary = self.create_summaries(
            loss_function)

        # Start train session.
        self.open_session()
        train_info = self.load_train_info()
        self.load_graph()

        # Create batch generators.
        train_generator = BatchGenerator(
            "train", self.sess, tfrecord_path, self.input_size[0],
            self.input_size[1], self.input_size[2], batch_size)
        test_generator = BatchGenerator(
            "validation", self.sess, tfrecord_path, self.input_size[0],
            self.input_size[1], self.input_size[2], batch_size)

        while train_info["step"] < num_steps or num_steps == -1:
            # Get train batch.
            forgd_samples, backd_samples, labels = train_generator.get_next()

            if train_info["step"] % display_train_loss_step == 0:
                train_loss_s, error_classes, error_action, loss_train_val, \
                    _opt_val = self.sess.run(
                        [loss_summary, cross_entropy_classes,
                            cross_entropy_action, loss_function, optimizer],
                        feed_dict={
                            self.input["input_video"]: forgd_samples,
                            self.input["input_background_video"]:
                            backd_samples,
                            self.input["input_label"]: labels})
                train_writer.add_summary(train_loss_s, train_info["step"])
                print('Step %i: train loss: %f,'
                      ' classes loss: %f, action loss: %f'
                      % (train_info["step"], loss_train_val,
                         error_classes, error_action))
            else:
                _opt_val, loss_train_val = self.sess.run(
                    [optimizer, loss_function],
                    feed_dict={self.input["input_video"]: forgd_samples,
                               self.input["input_background_video"]:
                               backd_samples,
                               self.input["input_label"]: labels})

            self.save_train_info(train_info)
            train_writer.flush()

            # Display test loss and input/output images.
            if train_info["step"] % display_test_loss_step == 0:
                test_loss_list = []
                error_classes_list = []
                error_action_list = []

                batch_index = 0
                while batch_index < num_test_batches:
                    forgd_samples, backd_samples, labels = \
                        test_generator.get_next()

                    batch_index += 1
                    if batch_index < num_test_batches:
                        loss_test_val, error_classes, error_action = \
                            self.sess.run(
                                [loss_function, cross_entropy_classes,
                                 cross_entropy_action],
                                feed_dict={
                                    self.input["input_video"]: forgd_samples,
                                    self.input["input_background_video"]:
                                    backd_samples,
                                    self.input["input_label"]: labels})
                    else:
                        loss_s, loss_test_val, error_classes, error_action = \
                            self.sess.run(
                                [loss_summary, loss_function,
                                 cross_entropy_classes, cross_entropy_action],
                                feed_dict={
                                    self.input["input_video"]: forgd_samples,
                                    self.input["input_background_video"]:
                                    backd_samples,
                                    self.input["input_label"]: labels})

                test_loss_list.append(loss_test_val)
                error_classes_list.append(error_classes)
                error_action_list.append(error_action)
                loss_test_val = np.mean(test_loss_list)

                if loss_test_val < train_info["best_test_lost"]:
                    train_info["best_test_lost"] = loss_test_val
                    self.saver.save(self.sess, model_path,
                                    global_step=train_info["step"])

                print('Step %i: validation loss: %f,'
                      ' best validation loss: %f, classes loss: %f, '
                      'action loss: %f'
                      % (train_info["step"],
                         loss_test_val, train_info["best_test_lost"],
                         np.mean(error_classes_list),
                         np.mean(error_action_list)))
                test_writer.add_summary(loss_s, train_info["step"])
                test_writer.flush()
                self.save_train_info(train_info)

            train_info["step"] += 1
        self.close_session()
Exemplo n.º 31
0
configs.DEFINE_string("train_datafile", None,"Training file")
configs.DEFINE_float("validation_size",0.0,"Size of validation set as %")
configs.DEFINE_integer("seed",None,"Seed for deterministic training")
configs.DEFINE_float("rnn_loss_weight",None,"How much moret to weight kth example")
config = configs.get_configs()

if config.train_datafile is None:
    config.train_datafile = config.datafile

train_path = get_data_path(config.data_dir,config.train_datafile)

print("Loading batched data ...")

batches = BatchGenerator(train_path, config,
                         config.batch_size,config.num_unrollings,
                         validation_size=config.validation_size,
                         randomly_sample=True)


for i in range(10):
    b = batches.next_batch()
    print("-----------------------------------------------------")
    print("----Atributes: ")
    print(b.attribs)
    print("----Sequence Lengths: ")
    print(b.seq_lengths)
    print("----Train Weights: ")
    print(b.train_mask)
    print("----Valid Weights: ")
    print(b.valid_mask)
    print("----Targets: ")
Exemplo n.º 32
0
cache_path = os.path.splitext(train_path)[0] + '.cache'

print("Loading training data ...")

end_date = config.end_date

############################################################################
#   If cached data doesn't exist, build it
############################################################################
if not os.path.exists(cache_path) or config.use_cache is False:
    print("Generating Data from Scratch")

    config.end_date = 999901

    data_bg = BatchGenerator(train_path, config,
          config.batch_size, config.num_unrollings,
          validation_size=config.validation_size,
          randomly_sample=False)

    train_bg = data_bg.train_batches()
    valid_bg = data_bg.valid_batches()

    print("Grabbing tabular data from batch generator")
    X_train_full, Y_train_full, dates_train = get_tabular_data(train_bg)
    X_valid_full, Y_valid_full, dates_valid = get_tabular_data(valid_bg)

    print("Saving tabular data to cache")    
    # JDA 10/27/16: Save these objects to cache here
    if not os.path.exists(cache_path):
       os.mkdir(cache_path)
    np.save(os.path.join(cache_path, 'X_train_full.npy'), X_train_full )
    np.save(os.path.join(cache_path, 'Y_train_full.npy'), Y_train_full )
Exemplo n.º 33
0
def train(num_layers, units_per_layer):
    print('download and read data')
    filename = maybe_download('text8.zip', 31344016)

    with tf.Session(graph=tf.Graph()) as validation_session:
        validation_model = ReverseSeqValidationSummaryModel(validation_session.graph)
        validation_session.run(tf.initialize_all_variables())

        with tf.Session(graph=tf.Graph()) as sess:
            # Create model.
            print("Creating %d layers of %d units." % (num_layers, units_per_layer))
            model = create_model(sess, num_layers, units_per_layer, False, DECODER_FEED_PREVIOUS)

            # Read data
            text = read_data(filename)
            # create datasets
            valid_size = 10000
            valid_text = text[:valid_size]
            train_text = text[valid_size:]
            # create batch generators
            validation_batch = BatchGenerator(valid_text, 1, MIN_CHARS_IN_BATCH, MAX_CHARS_IN_BATCH,
                                              reverse_encoder_input=REVERSE_ENCODER_INPUT)
            if TRAIN_BATCH_TYPE == UseTrainBatchType.use_english_words:
                train_batch = BatchGenerator(train_text, BATCH_SIZE, MIN_CHARS_IN_BATCH, MAX_CHARS_IN_BATCH,
                                             reverse_encoder_input=REVERSE_ENCODER_INPUT)
            elif TRAIN_BATCH_TYPE == UseTrainBatchType.use_random_train_words:
                train_batch = RandomWordsBatchGenerator(BATCH_SIZE, MIN_CHARS_IN_BATCH, MAX_CHARS_IN_BATCH,
                                                        reverse_encoder_input=REVERSE_ENCODER_INPUT)
            else:
                train_batch = ReverseStringBatchGenerator(BATCH_SIZE, MIN_CHARS_IN_BATCH, MAX_CHARS_IN_BATCH,
                                                          reverse_encoder_input=REVERSE_ENCODER_INPUT)
                validation_batch = ReverseStringBatchGenerator(1, MIN_CHARS_IN_BATCH, MAX_CHARS_IN_BATCH,
                                                               reverse_encoder_input=REVERSE_ENCODER_INPUT)


            # This is the training loop.
            step_time, loss = 0.0, 0.0
            current_step = model.global_step.eval() + 1
            print('starting from step %i' % current_step)
            previous_losses = []
            enc_state = model.initial_enc_state.eval()
            run_data_dir = run_data_directory(num_layers, units_per_layer)
            while True:
                # Get a batch and make a step.
                start_time = time.time()
                encoder_inputs, decoder_inputs, decoder_weights = train_batch.next()
                _, step_loss, enc_state = model.step(sess, current_step, encoder_inputs, decoder_inputs, decoder_weights,
                                                     enc_state, DROPOUT_PROB, False)
                step_time += (time.time() - start_time) / STEPS_PER_CHECKPOINT
                loss += step_loss / STEPS_PER_CHECKPOINT
                current_step += 1
                # Once in a while, we save checkpoint, print statistics, and run evals.
                if current_step % STEPS_PER_CHECKPOINT == 0:
                    # Print statistics for the previous epoch.
                    perplexity = math.exp(loss) if loss < 300 else float('inf')
                    print("global step %d learning rate %.4f step-time %.2f loss %.3f perplexity "
                          "%.2f" % (model.global_step.eval(), model.learning_rate.eval(),
                                    step_time, loss, perplexity))
                    # Decrease learning rate if no improvement was seen over last 3 times.
                    if len(previous_losses) > 2 and loss > max(previous_losses[-3:]):
                        sess.run(model.learning_rate_decay_op)
                    previous_losses.append(loss)
                    # Save checkpoint and zero timer and loss.
                    checkpoint_path = os.path.join(run_data_dir, 'state')
                    model.saver.save(sess, checkpoint_path, global_step=model.global_step)
                    step_time, loss = 0.0, 0.0
                    # Run evals on validation set and print their perplexity.
                    val_perp = validate_sentence(sess, model, validation_batch, enc_state, current_step)
                    summary_str = validation_model.merged_validation.eval(
                        {validation_model.validation_perp: val_perp if val_perp < 500 else 500 },
                        validation_session)
                    model.summ_writer.add_summary(summary_str, current_step)
                    sys.stdout.flush()
Exemplo n.º 34
0
 def setUp(self):
     self.batch_gen = BatchGenerator(DATA_PATH, VAL_DATA_PATH, 2)
     self.batch_gen.load_data()
Exemplo n.º 35
0
def run_test():
    # test batch generation
    print('download and read data')
    filename = maybe_download('text8.zip', 31344016)
    # Read data
    text = read_data(filename)
    # create datasets
    valid_size = 1000
    valid_text = text[:valid_size]
    train_text = text[valid_size:]
    # train_size = len(train_text)
    # create batch generators
    train_batches = BatchGenerator(train_text, BATCH_SIZE, MIN_CHARS_IN_BATCH, MAX_CHARS_IN_BATCH, reverse_encoder_input=True)
    valid_batches = BatchGenerator(valid_text, 1, MIN_CHARS_IN_BATCH, MAX_CHARS_IN_BATCH)

    # print(BatchGenerator.characters(train_batches.next()[0]))
    print('test main batch generator')
    e_bs, d_bs, dw_bs = train_batches.next()
    print(BatchGenerator.batches2string(e_bs))
    print(BatchGenerator.batches2string(d_bs))
    BatchGenerator.verify_weights(d_bs, dw_bs)
    e_bs, d_bs, dw_bs = train_batches.next()
    print(BatchGenerator.batches2string(e_bs))
    print(BatchGenerator.batches2string(d_bs))
    BatchGenerator.verify_weights(d_bs, dw_bs)
    e_bs, d_bs, dw_bs = valid_batches.next()
    print(BatchGenerator.batches2string(e_bs))
    print(BatchGenerator.batches2string(d_bs))
    BatchGenerator.verify_weights(d_bs, dw_bs)
    e_bs, d_bs, dw_bs = valid_batches.next()
    print(BatchGenerator.batches2string(e_bs))
    print(BatchGenerator.batches2string(d_bs))
    BatchGenerator.verify_weights(d_bs, dw_bs)

    print('test random english generator')
    random_batch = RandomWordsBatchGenerator(2, MIN_CHARS_IN_BATCH, MAX_CHARS_IN_BATCH,
                                             reverse_encoder_input=False)
    for _ in range(10):
        e_bs, d_bs, dw_bs = random_batch.next()
        print(BatchGenerator.batches2string(e_bs))
        print(BatchGenerator.batches2string(d_bs))
        BatchGenerator.verify_weights(d_bs, dw_bs)

    print('test random string gen with padding')
    random_str_batch = ReverseStringBatchGenerator(1, 8, 8,
                                             reverse_encoder_input=False)
    e_bs, d_bs, dw_bs = random_str_batch.next()
    print(BatchGenerator.batches2string(e_bs))
    print(BatchGenerator.batches2string(d_bs))
    BatchGenerator.verify_weights(d_bs, dw_bs)
    random_str_batch = ReverseStringBatchGenerator(2, 8, 16,
                                             reverse_encoder_input=False)
    e_bs, d_bs, dw_bs = random_str_batch.next()
    print(BatchGenerator.batches2string(e_bs))
    print(BatchGenerator.batches2string(d_bs))
    BatchGenerator.verify_weights(d_bs, dw_bs)