Пример #1
0
def test_nowindow(input_seq, batch_size, seq_len):
    """
    This test checks no-shuffle option, with non-overlapping windows
    Goes over the dataset just once
    """
    # Create the iterator
    data_array = {'X': np.copy(input_seq),
                  'y': np.roll(input_seq, axis=0, shift=-1)}
    iterations = None
    it_array = SequentialArrayIterator(data_arrays=data_array, time_steps=seq_len,
                                       batch_size=batch_size, tgt_key='y', shuffle=False,
                                       total_iterations=iterations)

    # Cut off the last samples of the input sequence that don't fit in a batch
    no_samples = seq_len * batch_size * (len(input_seq) // seq_len // batch_size)
    target_seq = input_seq[1:no_samples + 1]
    input_seq = input_seq[:no_samples]

    for idx, iter_val in enumerate(it_array):
        # Start of the array needs to be time_steps * idx * batch_size
        start_idx = seq_len * idx * batch_size
        idcs = np.arange(start_idx, start_idx + seq_len * batch_size) % input_seq.shape[0]

        # Reshape the input sequence into batches
        reshape_dims = (batch_size, seq_len)
        if len(input_seq.shape) > 1:
            reshape_dims = (batch_size, seq_len, input_seq.shape[1])
        # expected_x will have contigous non-overlapping samples
        expected_x = input_seq[idcs].reshape(reshape_dims)
        expected_y = target_seq[idcs].reshape(reshape_dims)

        # We are not shuffling, consecutive samples are contiguous
        # They will also be non-overlapping
        assert np.array_equal(expected_x, iter_val['X'])
        assert np.array_equal(expected_y, iter_val['y'])
Пример #2
0
def test_shuffle(input_seq, batch_size, seq_len):
    """
    This test checks shuffle option, with non-overlapping windows
    Goes over the dataset just once
    """
    def sample2char(sample):
        # Converts a numpy array into a single string
        # sample is a numpy array
        # We convert to string so that the data can be hashable to be used in a python set
        str_array = ''.join(['%1.2f' % i for i in sample.flatten()])
        return str_array

    # create iterator
    data_array = {
        'X': np.copy(input_seq),
        'y': np.roll(input_seq, axis=0, shift=-1)
    }
    it_array = SequentialArrayIterator(data_arrays=data_array,
                                       time_steps=seq_len,
                                       batch_size=batch_size,
                                       tgt_key='y',
                                       shuffle=True,
                                       total_iterations=None)

    # Cut off the last samples of the input sequence that don't fit in a batch
    no_batches = len(input_seq) // seq_len // batch_size
    no_samples = batch_size * no_batches
    used_steps = seq_len * no_samples
    input_seq = input_seq[:used_steps]

    # Reshape the input sequence into batches
    reshape_dims = (no_samples, seq_len)
    if len(input_seq.shape) > 1:
        reshape_dims = (no_samples, seq_len, input_seq.shape[1])
    expected_x = input_seq.reshape(reshape_dims)
    # Convert each sample into text and build a set
    set_x = set([sample2char(sample) for sample in expected_x])
    sample2loc = {
        sample2char(sample): idx
        for idx, sample in enumerate(expected_x)
    }

    count = 0
    for idx, iter_val in enumerate(it_array):
        # for every sample in this batch
        for j, sample in enumerate(iter_val['X']):
            txt_sample = sample2char(sample)
            assert txt_sample in set_x  # check sequence is valid
            set_x.discard(txt_sample)
            if (idx * batch_size + j) == sample2loc[txt_sample]:
                count += 1
    assert count < (len(expected_x) * .5)  # check shuffle happened
    assert len(set_x) == 0  # check every sequence appeared in iterator
Пример #3
0
def test_rolling_window(input_seq, batch_size, seq_len, strides):
    # This test checks if the rolling window works
    # We check if the first two samples in each batch are strided by strides

    # Truncate input sequence such that last section that doesn't fit in a batch
    # is thrown away
    input_seq = input_seq[:seq_len * batch_size * (len(input_seq) // seq_len // batch_size)]
    data_array = {'X': input_seq,
                  'y': np.roll(input_seq, axis=0, shift=-1)}
    time_steps = seq_len
    it_array = SequentialArrayIterator(data_arrays=data_array, time_steps=time_steps,
                                       stride=strides, batch_size=batch_size, tgt_key='y',
                                       shuffle=False)
    for idx, iter_val in enumerate(it_array):
        # Start of the array needs to be time_steps * idx
        assert np.array_equal(iter_val['X'][0, strides:time_steps],
                              iter_val['X'][1, :time_steps - strides])
        assert np.array_equal(iter_val['y'][0, strides:time_steps],
                              iter_val['y'][1, :time_steps - strides])
Пример #4
0
parser.add_argument('--use_lut',
                    action='store_true',
                    help='choose to use lut as first layer')
parser.set_defaults()
args = parser.parse_args()

# these hyperparameters are from the paper
args.batch_size = 50
time_steps = 150
hidden_size = 500

# download penn treebank
tree_bank_data = PTB(path=args.data_dir)
ptb_data = tree_bank_data.load_data()
train_set = SequentialArrayIterator(ptb_data['train'],
                                    batch_size=args.batch_size,
                                    time_steps=time_steps,
                                    total_iterations=args.num_iterations)

valid_set = SequentialArrayIterator(ptb_data['valid'],
                                    batch_size=args.batch_size,
                                    time_steps=time_steps)

inputs = train_set.make_placeholders()
ax.Y.length = len(tree_bank_data.vocab)


def expand_onehot(x):
    return ng.one_hot(x, axis=ax.Y)


# weight initialization
Пример #5
0
parser = NgraphArgparser(__doc__)
parser.set_defaults(batch_size=128, num_iterations=2000)
args = parser.parse_args()

# model parameters
time_steps = 5
hidden_size = 256
gradient_clip_value = 5

# download penn treebank
# set shift_target to be False, since it is going to predict the same sequence
tree_bank_data = PTB(path=args.data_dir, shift_target=False)
ptb_data = tree_bank_data.load_data()
train_set = SequentialArrayIterator(ptb_data['train'],
                                    batch_size=args.batch_size,
                                    time_steps=time_steps,
                                    total_iterations=args.num_iterations,
                                    reverse_target=True,
                                    get_prev_target=True)
valid_set = SequentialArrayIterator(ptb_data['valid'],
                                    batch_size=args.batch_size,
                                    time_steps=time_steps,
                                    total_iterations=10,
                                    reverse_target=True,
                                    get_prev_target=True)

inputs = train_set.make_placeholders()
ax.Y.length = len(tree_bank_data.vocab)


def generate_samples(inputs, encode, decode, num_time_steps):
    """
Пример #6
0
# Build an iterator that gets seq_len long chunks of characters
# Stride is by how many characters the window moves in each step
# if stride is set to seq_len, windows are non-overlapping
stride = seq_len // 8
shakes_data_train = {
    'X': np.copy(shakes.train),
    'y': np.roll(np.copy(shakes.train), shift=-1)
}
shakes_data_test = {
    'X': np.copy(shakes.test),
    'y': np.roll(np.copy(shakes.test), shift=-1)
}
shakes_train = SequentialArrayIterator(data_arrays=shakes_data_train,
                                       total_iterations=num_iterations,
                                       time_steps=seq_len,
                                       batch_size=batch_size,
                                       stride=stride,
                                       include_iteration=True,
                                       tgt_key='y',
                                       shuffle=False)
shakes_test = SequentialArrayIterator(data_arrays=shakes_data_test,
                                      time_steps=seq_len,
                                      batch_size=batch_size,
                                      stride=stride,
                                      include_iteration=True,
                                      tgt_key='y',
                                      shuffle=False)
# Our input is of size (batch_size, seq_len)
# batch_axis must be named N
batch_axis = ng.make_axis(length=batch_size, name="N")
# time_axis must be named REC
time_axis = ng.make_axis(length=seq_len, name="REC")
Пример #7
0
parser.set_defaults(gen_be=False)
args = parser.parse_args()

# these hyperparameters are from the paper
args.batch_size = 50
time_steps = 5
hidden_size = 10
gradient_clip_value = 15

# download penn treebank
# set shift_target to be False, since it is going to predict the same sequence
tree_bank_data = PTB(path=args.data_dir, shift_target=False)
ptb_data = tree_bank_data.load_data()
train_set = SequentialArrayIterator(ptb_data['train'],
                                    batch_size=args.batch_size,
                                    time_steps=time_steps,
                                    total_iterations=args.num_iterations,
                                    reverse_target=True,
                                    get_prev_target=True)

# weight initialization
init = UniformInit(low=-0.08, high=0.08)

# model initialization
one_hot_enc = Preprocess(functor=lambda x: ng.one_hot(x, axis=ax.Y))
enc = Recurrent(hidden_size,
                init,
                activation=Tanh(),
                reset_cells=True,
                return_sequence=False)
one_hot_dec = Preprocess(functor=lambda x: ng.one_hot(x, axis=ax.Y))
dec = Recurrent(hidden_size,