Exemplo n.º 1
0
    def __init__(self, num_split):

        lmdb_file = "/srv/share/vgoswami8/conceptual_captions/training_feat_all.lmdb"

        caption_path = "/srv/share/vgoswami8/conceptual_captions/caption_train.json"
        print("Loading from %s" % lmdb_file)
        ds = td.LMDBSerializer.load(lmdb_file, shuffle=False)
        self.num_dataset = int(len(ds) / num_split) + 1
        ds = td.PrefetchDataZMQ(ds, nr_proc=1)
        ds = td.FixedSizeData(ds, self.num_dataset, keep_state=True)
        self.ds = ds
        self.ds.reset_state()
Exemplo n.º 2
0
def get_dataflow(files, params):
    """
    Build a tensorflow Dataset from appropriate tfrecords files.
    :param files: list a file paths corresponding to appropriate tfrecords data
    :param params: parsed arguments
    :param is_training: bool, true for training.
    :return: (nextdata, num_samples).
    nextdata: list of tensorflow ops that produce the next input with the following elements:
    true_states, global_map, init_particles, observations, odometries, is_first_step.
    See House3DTrajData.get_data for definitions.
    """
    mapmode = params.mapmode
    obsmode = params.obsmode
    batchsize = params.batchsize
    trajlen = params.trajlen
    num_particles = params.num_particles

    # build initial covariance matrix of particles, in pixels and radians
    particle_std = params.init_particles_std.copy()
    particle_std[0] = particle_std[0] / params.map_pixel_in_meters  # convert meters to pixels
    particle_std2 = np.square(particle_std)  # variance
    init_particles_cov = np.diag(particle_std2[(0, 0, 1),])

    df = House3DFilterData(
        files, mapmode, obsmode, trajlen, num_particles,
        params.init_particles_distr, init_particles_cov,
        seed=params.seed
    )

    df = dataflow.FixedSizeData(df, size=df.size(), keep_state=False)

    df = BatchDataWithPad(df, batchsize, padded_indices=(2, 3))

    num_samples = df.size()

    df.reset_state()

    # # test dataflow
    # df = dataflow.TestDataSpeed(dataflow.PrintData(df), 100)
    # df.start()

    types = [tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.bool]
    sizes = [(batchsize, trajlen, 3),  # True states
             (batchsize, num_particles, 3),  # Initial particles
             (batchsize, None, None, 2),  # Global map: wall, door
             (batchsize, None, None, 3),  # Special maps: empty space map, wall margin map, room type map
             (batchsize, trajlen, 56, 56, 1),  # Depth images, 1 channel
             (batchsize, trajlen, 3),  # Odometry
             (batchsize, trajlen, num_directions),  # Rotation angles
             (batchsize, trajlen, num_directions, 56, 2),  # Points on each wall filter for each direction
             (batchsize,), ]  # is_first_step,

    # turn it into a tf dataset
    def tuplegen():
        for dp in df.get_data():
            yield tuple(dp)

    dataset = tf.data.Dataset.from_generator(tuplegen, tuple(types), tuple(sizes))
    iterator = dataset.make_one_shot_iterator()
    nextdata = iterator.get_next()

    return nextdata, num_samples
Exemplo n.º 3
0
def get_dataflow(files, params, is_training):
    """
    Build a tensorflow Dataset from appropriate tfrecords files.
    :param files: list a file paths corresponding to appropriate tfrecords data
    :param params: parsed arguments
    :param is_training: bool, true for training.
    :return: (nextdata, num_samples).
    nextdata: list of tensorflow ops that produce the next input with the following elements:
    true_states, global_map, init_particles, observations, odometries, is_first_step.
    See House3DTrajData.get_data for definitions.
    num_samples: number of samples that make an epoch
    """

    mapmode = params.mapmode
    obsmode = params.obsmode
    batchsize = params.batchsize
    num_particles = params.num_particles
    trajlen = params.trajlen
    bptt_steps = params.bptt_steps

    # build initial covariance matrix of particles, in pixels and radians
    particle_std = params.init_particles_std.copy()
    particle_std[0] = particle_std[
        0] / params.map_pixel_in_meters  # convert meters to pixels
    particle_std2 = np.square(particle_std)  # element-wise variance
    init_particles_cov = np.diag(particle_std2[(0, 0,
                                                1), ])  # index is (0,0,1)

    df = House3DTrajData(
        files,
        mapmode,
        obsmode,
        trajlen,
        num_particles,
        params.init_particles_distr,
        init_particles_cov,
        seed=(params.seed if params.seed is not None and params.seed > 0 else
              (params.validseed if not is_training else None)))
    # data: true_states, global_map, init_particles, observation, odometry

    # make it a multiple of batchsize
    df = dataflow.FixedSizeData(df,
                                size=(df.size() // batchsize) * batchsize,
                                keep_state=False)

    # shuffle
    if is_training:
        df = dataflow.LocallyShuffleData(
            df, 100 * batchsize)  # buffer_size = 100 * batchsize

    # repeat data for the number of epochs
    df = dataflow.RepeatedData(df, params.epochs)

    # batch
    df = BatchDataWithPad(df, batchsize, padded_indices=(1, ))

    # break trajectory into multiple segments for BPTT training. Augment df with is_first_step indicator
    df = BreakForBPTT(df,
                      timed_indices=(0, 3, 4),
                      trajlen=trajlen,
                      bptt_steps=bptt_steps)
    # data: true_states, global_map, init_particles, observation, odometry, is_first_step

    num_samples = df.size() // params.epochs

    df.reset_state()

    # # test dataflow
    # df = dataflow.TestDataSpeed(dataflow.PrintData(df), 100)
    # df.start()

    obs_ch = {'rgb': 3, 'depth': 1, 'rgb-depth': 4}
    map_ch = {
        'wall': 1,
        'wall-door': 2,
        'wall-roomtype': 10,
        'wall-door-roomtype': 11
    }  # every semantic is a channel
    types = [
        tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.bool
    ]
    sizes = [
        (batchsize, bptt_steps, 3),
        (batchsize, None, None, map_ch[mapmode]),
        (batchsize, num_particles, 3),
        (batchsize, bptt_steps, 56, 56, obs_ch[obsmode]),
        (batchsize, bptt_steps, 3),
        (),
    ]

    # turn it into a tf dataset
    def tuplegen():
        for dp in df.get_data():
            yield tuple(dp)

    dataset = tf.data.Dataset.from_generator(tuplegen, tuple(types),
                                             tuple(sizes))
    iterator = dataset.make_one_shot_iterator()  # only read once
    nextdata = iterator.get_next()

    return nextdata, num_samples