Пример #1
0
def make_iterators(dbpedia_data, num_iterations, batch_size):
    train_set = ArrayIterator(dbpedia_data['train'],
                              batch_size=batch_size,
                              total_iterations=num_iterations,
                              shuffle=True)
    test_set = ArrayIterator(dbpedia_data['test'], batch_size=batch_size)
    return train_set, test_set
Пример #2
0
def get_fake_data(dataset, batch_size, n_iter):
    x_train, y_train = generate_data(dataset, batch_size)

    train_data = {'image': {'data': x_train, 'axes': ('batch', 'C', 'height', 'width')},
                  'label': {'data': y_train, 'axes': ('batch',)}}

    train_set = ArrayIterator(train_data, batch_size, total_iterations=n_iter)
    inputs = train_set.make_placeholders(include_iteration=True)
    return inputs, train_data, train_set
Пример #3
0
def plot_generated(trainer):
    # Get a batch from the train set
    train_set_one_epoch = ArrayIterator(dataset.train, batch_size, shuffle=False)
    gen_series = trainer.predict(train_set_one_epoch, num_batches=1)
    train_set_one_epoch.reset()

    # Get an example from the batch
    gen_series = gen_series[4]

    if args.backward:
        # If args.backward is set, the autoencoder would have produced the input sequence in reverse.
        # We flip it again to match the true series
        gen_series = gen_series[::-1, :]

    true_series = next(train_set_one_epoch)['X'][4]

    # Plot the true and generated values of each series
    ncols = int(np.ceil((dataset.n_sensors + dataset.n_operating_modes) * 1.0 // 3))
    fig, ax = plt.subplots(ncols, 3)
    fig.set_figheight(20)
    fig.set_figwidth(10)

    for i in range(dataset.n_operating_modes):
        plt.subplot(ncols, 3, i + 1)
        if i == 0:
            plt.plot(true_series[:, i], label="true", color="blue")
        else:
            plt.plot(true_series[:, i], color="blue")
        if i == 0:
            plt.plot(gen_series[:, i], label="gen", color="red")
        else:
            plt.plot(gen_series[:, i], color="red")
        plt.title("Operating mode {}".format(i + 1))

    for i in range(dataset.n_sensors):
        plt.subplot(ncols, 3, dataset.n_operating_modes + i + 1)
        plt.plot(true_series[:, dataset.n_operating_modes + i], color="blue")
        plt.plot(gen_series[:, dataset.n_operating_modes + i], color="red")
        plt.title("Sensor {}".format(i + 1))
    fig.legend()

    plt.tight_layout()
    fig.savefig(os.path.join(args.results_dir, "generated_series.png"))
Пример #4
0
def get_fake_data(dataset, batch_size, num_iterations, seed=None):
    x_train, y_train = generate_data(dataset, batch_size, rand_seed=seed)

    train_data = {
        'image': {
            'data': x_train,
            'axes': ('batch', 'C', 'H', 'W')
        },
        'label': {
            'data': y_train,
            'axes': ('batch', )
        }
    }

    train_set = ArrayIterator(train_data,
                              batch_size,
                              total_iterations=num_iterations)
    inputs = train_set.make_placeholders(include_iteration=True)
    return inputs, train_data, train_set
Пример #5
0
def get_fake_cifar(batch_size, n_iter):
    cifar = FakeCIFAR()
    cifar.reset(0)
    batch_xs, batch_ys = cifar.train.next_batch(batch_size)
    x_train = np.vstack(batch_xs).reshape(-1, 3, 32, 32)
    y_train = np.vstack(batch_ys).ravel()

    train_data = {
        'image': {
            'data': x_train,
            'axes': ('batch', 'C', 'height', 'width')
        },
        'label': {
            'data': y_train,
            'axes': ('batch', )
        }
    }

    train_set = ArrayIterator(train_data, batch_size, total_iterations=n_iter)
    inputs = train_set.make_placeholders(include_iteration=True)
    return inputs, train_data, train_set
Пример #6
0
# parse command line arguments
parser = NgraphArgparser()
parser.add_argument(
    '--plot_interval',
    type=int,
    default=200,
    help='save generated images with a period of this many iterations')
parser.add_argument('--seed', type=int, default=0, help='random seed')
args = parser.parse_args()
np.random.seed(args.rng_seed)

args.batch_size = 32

# Create the dataloader
train_data, valid_data = MNIST(args.data_dir).load_data()
train_set = ArrayIterator(train_data, args.batch_size)

# noise source
noise_dim = (2, 1, 3, 3)
noise_generator = Noise(train_set.ndata,
                        shape=noise_dim + (args.batch_size, ),
                        seed=args.seed)

# generator network
g_scope = 'generator'
filter_init = GaussianInit(var=0.05)
relu = Rectlin(slope=0)

deconv_layers = [
    Deconvolution((1, 1, 16),
                  filter_init,
Пример #7
0
def train_mnist_mlp(transformer_name,
                    data_dir=None,
                    rng_seed=12,
                    batch_size=128,
                    train_iter=10,
                    eval_iter=10):
    assert transformer_name in ['cpu', 'hetr']
    assert isinstance(rng_seed, int)

    # Apply this metadata to graph regardless of transformer,
    # but it is ignored for non-HeTr case
    hetr_device_ids = (0, 1)

    # use consistent rng seed between runs
    np.random.seed(rng_seed)

    # Data
    train_data, valid_data = MNIST(path=data_dir).load_data()
    train_set = ArrayIterator(train_data,
                              batch_size,
                              total_iterations=train_iter)
    valid_set = ArrayIterator(valid_data, batch_size)
    inputs = train_set.make_placeholders()
    ax.Y.length = 10

    # Model
    with ng.metadata(device_id=hetr_device_ids, parallel=ax.N):
        seq1 = Sequential([
            Preprocess(functor=lambda x: x / 255.),
            Affine(nout=100, weight_init=GaussianInit(), activation=Rectlin()),
            Affine(axes=ax.Y,
                   weight_init=GaussianInit(),
                   activation=Logistic())
        ])

        train_prob = seq1(inputs['image'])
        train_loss = ng.cross_entropy_binary(
            train_prob, ng.one_hot(inputs['label'], axis=ax.Y))

        optimizer = GradientDescentMomentum(0.1, 0.9)
        batch_cost = ng.sequential(
            [optimizer(train_loss),
             ng.mean(train_loss, out_axes=())])
        train_outputs = dict(batch_cost=batch_cost)

        with Layer.inference_mode_on():
            inference_prob = seq1(inputs['image'])
        errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]),
                              inputs['label'])
        eval_loss = ng.cross_entropy_binary(
            inference_prob, ng.one_hot(inputs['label'], axis=ax.Y))
        eval_outputs = dict(cross_ent_loss=eval_loss, misclass_pct=errors)

    # Runtime
    with closing(
            ngt.make_transformer_factory(transformer_name)()) as transformer:
        train_computation = make_bound_computation(transformer, train_outputs,
                                                   inputs)
        loss_computation = make_bound_computation(transformer, eval_outputs,
                                                  inputs)

        train_costs = list()
        for step in range(train_iter):
            out = train_computation(next(train_set))
            train_costs.append(float(out['batch_cost']))

        ce_loss = list()
        for step in range(eval_iter):
            out = loss_computation(next(valid_set))
            ce_loss.append(np.mean(out['cross_ent_loss']))

        return train_costs, ce_loss
kernel_size = args.ksize
dropout = 1 - args.dropout  # amount to keep
seq_len = args.seq_len
batch_size = args.batch_size
n_epochs = args.epochs

music_dataset = Music(data_dir=args.datadir,
                      seq_len=seq_len,
                      dataset=args.dataset)
seq_len = music_dataset.seq_len
n_train = music_dataset.train['X']['data'].shape[0]
num_iterations = int(n_train * n_epochs * 1.0 / batch_size)
n_features = music_dataset.train['X']['data'].shape[2]

train_iterator = ArrayIterator(music_dataset.train,
                               batch_size,
                               total_iterations=num_iterations,
                               shuffle=True)
test_iterator = ArrayIterator(music_dataset.test, batch_size)

# Name and create axes
batch_axis = ng.make_axis(length=batch_size, name="N")
time_axis = ng.make_axis(length=seq_len, name="REC")
feature_axis = ng.make_axis(length=n_features, name="F")
out_axis = ng.make_axis(length=n_features, name="Fo")

in_axes = ng.make_axes([batch_axis, time_axis, feature_axis])
out_axes = ng.make_axes([batch_axis, time_axis, out_axis])

# Build placeholders for the created axes
inputs = dict(X=ng.placeholder(in_axes),
              y=ng.placeholder(out_axes),
Пример #9
0
    os.path.join(
        path_gen +
        "glove.trimmed.300.npz"))
embeddings = embeddingz['glove']
vocab_file = os.path.join(path_gen + 'vocab.dat')


print("creating training Set ")
train = get_data_array_squad_ngraph(params_dict, data_train, set_val='train')
dev = get_data_array_squad_ngraph(params_dict, data_dev, set_val='dev')
print('Train Set Size is', len(train['para']['data']))
print('Dev set size is', len(dev['para']['data']))


# Use Array Iterator for training set
train_set = ArrayIterator(train, batch_size=params_dict['batch_size'],
                          total_iterations=params_dict['num_iterations'])
# Use Array Iterator for validation set
valid_set = ArrayIterator(dev, batch_size=params_dict['batch_size'],
                          total_iterations=params_dict['num_iterations'])

# Make placeholderds for training
inputs = train_set.make_placeholders(include_iteration=True)


# Encoding Layer
rlayer_1 = LSTM(hidden_size, init, activation=Tanh(), reset_cells=True,
                gate_activation=Logistic(), return_sequence=True)

# Embedding Layer
embed_layer = LookupTable(
    params_dict['vocab_size'],
Пример #10
0
    affine_layer(2 * h_dim, Tanh(), name='d1')
]
if minibatch_discrimination:
    raise NotImplementedError
else:
    discriminator_layers.append(affine_layer(2 * h_dim, Tanh(), name='d2'))
discriminator_layers.append(affine_layer(1, Logistic(), name='d3'))
discriminator = Sequential(discriminator_layers)

# TODO discriminator pre-training

# dataloader
np.random.seed(1)
toy_gan_data = ToyGAN(batch_size, num_iterations)
train_data = toy_gan_data.load_data()
train_set = ArrayIterator(train_data, batch_size, num_iterations)
# reset seed for weights
np.random.seed(2)

# build network graph
inputs = train_set.make_placeholders()

z = inputs['noise_sample']
G = generator(z)  # generated sample

x = inputs['data_sample']
D1 = discriminator(x)  # discriminator output on real data sample

# cast G axes into x
G_t = ng.axes_with_order(G, reversed(G.axes))
G_cast = ng.cast_axes(G_t, x.axes)
Пример #11
0
babi = BABI_Dialog(
    path=data_dir,
    task=args.task,
    oov=args.use_oov,
    use_match_type=args.use_match_type,
    cache_match_type=args.cache_match_type,
    cache_vectorized=args.cache_vectorized)

weight_saver = Saver()

# Set num iterations to 1 epoch since we loop over epochs & shuffle
ndata = babi.data_dict['train']['memory']['data'].shape[0]
num_iterations = ndata // args.batch_size

train_set = ArrayIterator(babi.data_dict['train'], batch_size=args.batch_size,
                          total_iterations=num_iterations)
inputs = train_set.make_placeholders()

memn2n = MemN2N_Dialog(
    babi.cands,
    babi.num_cands,
    babi.max_cand_len,
    babi.memory_size,
    babi.max_utt_len,
    babi.vocab_size,
    args.emb_size,
    args.batch_size,
    use_match_type=args.use_match_type,
    kb_ents_to_type=babi.kb_ents_to_type,
    kb_ents_to_cand_idxs=babi.kb_ents_to_cand_idxs,
    match_type_idxs=babi.match_type_idxs,
Пример #12
0
discriminator_train_inputs = {'image': image, 'noise': z}

generator_train_outputs = {
    'batch_cost': mean_cost_g,
    'updates': updates_g,
    'generated': generated
}  # for plots
discriminator_train_outputs = {
    'batch_cost': mean_cost_d,
    'updates': updates_d,
    'grad_norm': mean_grad_norm
}

# create the dataloader
train_data, valid_data = MNIST(args.data_dir).load_data()
train_set = ArrayIterator(train_data, args.batch_size, args.num_iterations)

# noise source
noise_generator = Noise(shape=noise_dim + (args.batch_size, ),
                        seed=args.rng_seed)

with closing(ngt.make_transformer()) as transformer:

    train_computation_g = make_bound_computation(transformer,
                                                 generator_train_outputs,
                                                 generator_train_inputs)
    train_computation_d = make_bound_computation(transformer,
                                                 discriminator_train_outputs,
                                                 discriminator_train_inputs)

    # train loop
Пример #13
0
    args = parser.parse_args()

    np.random.seed(args.rng_seed)

    # Create the dataloader
    if args.use_aeon:
        from data import make_aeon_loaders
        train_set, valid_set = make_aeon_loaders(args.data_dir,
                                                 args.batch_size,
                                                 args.num_iterations)
    else:
        from ngraph.frontends.neon import ArrayIterator  # noqa
        from ngraph.frontends.neon import CIFAR10  # noqa
        train_data, valid_data = CIFAR10(args.data_dir).load_data()
        train_set = ArrayIterator(train_data,
                                  args.batch_size,
                                  total_iterations=args.num_iterations)
        valid_set = ArrayIterator(valid_data, args.batch_size)

    # we need to ask the dataset to create an iteration
    # placeholder for our learning rate schedule
    inputs = train_set.make_placeholders(include_iteration=True)
    ax.Y.length = 10

    resnet = residual_network(args.stage_depth)

    learning_rate_policy = {
        'name': 'schedule',
        'schedule': [32000, 48000],
        'gamma': 0.1,
        'base_lr': 0.1
Пример #14
0
args.batch_size = 128
time_steps = 128
hidden_size = 10
gradient_clip_value = 15
embed_size = 128
vocab_size = 20000
pad_idx = 0

# download IMDB
imdb_dataset = IMDB(path=args.data_dir,
                    sentence_length=time_steps,
                    pad_idx=pad_idx)
imdb_data = imdb_dataset.load_data()

train_set = ArrayIterator(imdb_data['train'],
                          batch_size=args.batch_size,
                          total_iterations=args.num_iterations)
valid_set = ArrayIterator(imdb_data['valid'], batch_size=args.batch_size)

inputs = train_set.make_placeholders()
ax.Y.length = imdb_dataset.nclass

# weight initialization
init = UniformInit(low=-0.08, high=0.08)

if args.layer_type == "rnn":
    rlayer = Recurrent(hidden_size,
                       init,
                       activation=Tanh(),
                       reset_cells=True,
                       return_sequence=False)
seq_len = args.seq_len
no_epochs = args.epochs
output_dim = 1

dataset = TurboFan(data_dir="../../data/", T=args.seq_len, skip=args.skip, max_rul_predictable=130)
feature_dim = dataset.n_features

if args.save_plots:
    dataset.plot_sample(out_folder, trajectory_id=10)

# Build input data iterables
# Yields an input array of Shape (batch_size, seq_len, input_feature_dim)
train_samples = len(dataset.train['X']['data'])
num_iterations = (no_epochs * train_samples) // batch_size

train_set = ArrayIterator(dataset.train, batch_size, total_iterations=num_iterations, shuffle=True)
train_set_one_epoch = ArrayIterator(dataset.train, batch_size, shuffle=False)
test_set = ArrayIterator(dataset.test, batch_size)

# Name and create axes
batch_axis = ng.make_axis(length=batch_size, name="N")
time_axis = ng.make_axis(length=seq_len, name="REC")
feature_axis = ng.make_axis(length=feature_dim, name="F")
out_axis = ng.make_axis(length=output_dim, name="Fo")

in_axes = ng.make_axes([batch_axis, time_axis, feature_axis])
out_axes = ng.make_axes([batch_axis, out_axis])

# Build placeholders for the created axes
inputs = dict(X=ng.placeholder(in_axes), y=ng.placeholder(out_axes),
              iteration=ng.placeholder(axes=()))
Пример #16
0
if args.model_file is not None:
    model_file = os.path.expanduser(args.model_file)
else:
    model_file = None

wikimovies = WIKIMOVIES(args.data_dir,
                        subset=args.subset,
                        reparse=args.reparse,
                        mem_source=args.mem_mode)

ndata = wikimovies.data_dict['train']['query']['data'].shape[0]
num_iterations = ndata // args.batch_size

train_set = ArrayIterator(wikimovies.data_dict['train'],
                          batch_size=args.batch_size,
                          total_iterations=num_iterations)
test_set = ArrayIterator(wikimovies.data_dict['test'],
                         batch_size=args.batch_size)
inputs = train_set.make_placeholders()
vocab_axis = ng.make_axis(length=wikimovies.vocab_size, name='vocab_axis')

memn2n = KVMemN2N(num_iterations, args.batch_size, args.emb_size, args.nhops,
                  wikimovies.story_length, wikimovies.memory_size,
                  wikimovies.vocab_size, vocab_axis, args.use_v_luts)
# Compute answer predictions
a_pred, _ = memn2n(inputs)

loss = ng.cross_entropy_multi(a_pred,
                              ng.one_hot(inputs['answer'], axis=vocab_axis),
                              usebits=True)
Пример #17
0
babi = BABI_Dialog(path=data_dir,
                   task=args.task,
                   oov=args.use_oov,
                   use_match_type=args.use_match_type,
                   cache_match_type=args.cache_match_type,
                   cache_vectorized=args.cache_vectorized)

weight_saver = Saver()

# Set num iterations to 1 epoch since we loop over epochs & shuffle
ndata = babi.data_dict['train']['memory']['data'].shape[0]
num_iterations = ndata // args.batch_size

train_set = ArrayIterator(babi.data_dict['train'],
                          batch_size=args.batch_size,
                          total_iterations=num_iterations)
inputs = train_set.make_placeholders()

memn2n = MemN2N_Dialog(babi.cands,
                       babi.num_cands,
                       babi.max_cand_len,
                       babi.memory_size,
                       babi.max_utt_len,
                       babi.vocab_size,
                       args.emb_size,
                       args.batch_size,
                       use_match_type=args.use_match_type,
                       kb_ents_to_type=babi.kb_ents_to_type,
                       kb_ents_to_cand_idxs=babi.kb_ents_to_cand_idxs,
                       match_type_idxs=babi.match_type_idxs,
Пример #18
0
from ngraph.frontends.neon import ax, loop_train, make_bound_computation, make_default_callbacks
from ngraph.frontends.neon import NgraphArgparser
from ngraph.frontends.neon import ArrayIterator

from cifar10 import CIFAR10
import ngraph.transformers as ngt

parser = NgraphArgparser(description='Train simple CNN on cifar10 dataset')
args = parser.parse_args()

np.random.seed(args.rng_seed)

# Create the dataloader
train_data, valid_data = CIFAR10(args.data_dir).load_data()
train_set = ArrayIterator(train_data,
                          args.batch_size,
                          total_iterations=args.num_iterations)
valid_set = ArrayIterator(valid_data, args.batch_size)
######################
# Model specification


def cifar_mean_subtract(x):
    bgr_mean = ng.persistent_tensor(axes=x.axes[0],
                                    initial_value=np.array([[104., 119.,
                                                             127.]]))
    y = ng.expand_dims((x - bgr_mean) / 255., ax.D, 1)
    return y


init_uni = UniformInit(-0.1, 0.1)
Пример #19
0
babi = BABI_Dialog(
    path=data_dir,
    task=args.task,
    oov=args.use_oov,
    use_match_type=args.use_match_type,
    cache_match_type=args.cache_match_type,
    cache_vectorized=args.cache_vectorized)

weight_saver = Saver()

# Set num iterations to 1 epoch since we loop over epochs & shuffle
ndata = babi.data_dict['train']['memory']['data'].shape[0]
num_iterations = ndata // args.batch_size

train_set = ArrayIterator(babi.data_dict['train'], batch_size=args.batch_size,
                          total_iterations=num_iterations)
dev_set = ArrayIterator(babi.data_dict['dev'], batch_size=args.batch_size)
test_set = ArrayIterator(babi.data_dict['test'], batch_size=args.batch_size)
inputs = train_set.make_placeholders()

memn2n = MemN2N_Dialog(
    babi.cands,
    babi.num_cands,
    babi.max_cand_len,
    babi.memory_size,
    babi.max_utt_len,
    babi.vocab_size,
    args.emb_size,
    args.batch_size,
    use_match_type=args.use_match_type,
    kb_ents_to_type=babi.kb_ents_to_type,
Пример #20
0
# Generate Lissajous Curve
data = timeseries.TimeSeries(
    train_ratio=0.8,  # ratio of samples to set aside for training
    seq_len=seq_len,  # length of the sequence in each sample
    npoints=no_points,  # number of points to take in each cycle
    ncycles=no_cycles,  # number of cycles in the curve
    batch_size=batch_size,
    curvetype='Lissajous2',
    predict_seq=predict_seq,  # set True if you want sequences as output
    look_ahead=look_ahead)  # number of time steps to look ahead

# Build input data iterables
# Yields an input array of Shape (batch_size, seq_len, input_feature_dim)
num_iterations = no_epochs * no_batches
train_set = ArrayIterator(data.train,
                          batch_size,
                          total_iterations=num_iterations)
test_set = ArrayIterator(data.test, batch_size)

# Name and create axes
batch_axis = ng.make_axis(length=batch_size, name="N")
time_axis = ng.make_axis(length=seq_len, name="REC")
feature_axis = ng.make_axis(length=feature_dim, name="feature_axis")
out_axis = ng.make_axis(length=output_dim, name="output_axis")

in_axes = ng.make_axes([batch_axis, time_axis, feature_axis])
if (predict_seq is True):
    out_axes = ng.make_axes([batch_axis, time_axis, out_axis])
else:
    out_axes = ng.make_axes([batch_axis, out_axis])
Пример #21
0
params_dict['vocab_size'] = len(vocab_list)

print('Loading Embeddings')
embeddingz = np.load(os.path.join(path_gen + "glove.trimmed.300.npz"))
embeddings = embeddingz['glove']
vocab_file = os.path.join(path_gen + 'vocab.dat')

print("creating training Set ")
train = get_data_array_squad_ngraph(params_dict, data_train, set_val='train')
dev = get_data_array_squad_ngraph(params_dict, data_dev, set_val='dev')
print('Train Set Size is', len(train['para']['data']))
print('Dev set size is', len(dev['para']['data']))

# Use Array Iterator for training set
train_set = ArrayIterator(train,
                          batch_size=params_dict['batch_size'],
                          total_iterations=params_dict['num_iterations'])
# Use Array Iterator for validation set
valid_set = ArrayIterator(dev,
                          batch_size=params_dict['batch_size'],
                          total_iterations=params_dict['num_iterations'])

# Make placeholderds for training
inputs = train_set.make_placeholders(include_iteration=True)

# Encoding Layer
rlayer_1 = LSTM(hidden_size,
                init,
                activation=Tanh(),
                reset_cells=True,
                gate_activation=Logistic(),