def make_special_conv4_l1(latent_dim, filter_length=3, num_filters=256): x_shape = (128, 54) huzz = HuzzerSource() data_pipeline = OneHotVecotorizer(TokenDatasource(huzz), x_shape[1], x_shape[0]) decoder_input = tf.placeholder(tf.float32, shape=(1, latent_dim), name='decoder_input') encoder_input = tf.placeholder(tf.float32, shape=(1, *x_shape), name='encoder_input') with conv_arg_scope2(): encoder_output, _, dense_layer_size = build_special_conv4_encoder( encoder_input, latent_dim, num_filters, filter_length=filter_length) decoder_output = build_special_conv4_decoder( decoder_input, x_shape, num_filters, filter_length=filter_length, dense_layer_size=dense_layer_size) decoder_output = tf.nn.softmax(decoder_output, dim=-1) decoder_output = tf.squeeze(decoder_output, 0) return data_pipeline, encoder_input, encoder_output, decoder_input, decoder_output
def test_char_splitter(): char_splitter = CharSplitter(HuzzerSource()) # check deteminism for i in range(20): x = char_splitter['{0}/{0}'.format(i)] y = char_splitter['{0}/{0}'.format(i)] assert x == y, 'Accessing same element produces different outcome.'
def test_huzzer(): """ Test deteminism. """ huzz = HuzzerSource() for i in range(20): x = huzz[str(i)] y = huzz[str(i)] assert x == y, 'Accessing same element produces different outcome.'
def test_one_hot(): expected_shape = (256, 54) one_hotter = OneHotVecotorizer(TokenDatasource(HuzzerSource()), 54, 256) # check deteminism and dimensions for i in range(20, 60): x = one_hotter['{0}'.format(i)] y = one_hotter['{0}'.format(i)] assert np.array_equal( x, y), 'Accessing same element produces different outcome.' assert x.shape == expected_shape, 'Incorrect shape for output.'
def test_one_hot_ascii(): expected_length = 10 one_hotter = OneHotVecotorizerASCII(CharSplitter(HuzzerSource()), total_string_length=expected_length) # check deteminism and dimensions for i in range(20, 60): x = one_hotter['{0}/{0}'.format(i)] y = one_hotter['{0}/{0}'.format(i)] assert np.array_equal( x, y), 'Accessing same element produces different outcome.' assert x.shape == (expected_length, 128), 'Incorrect shape for output.'
def make_conv1(): latent_dim = 16 x_shape = (128, 54) huzz = HuzzerSource() data_pipeline = OneHotVecotorizer(TokenDatasource(huzz), x_shape[1], x_shape[0]) decoder_input = tf.placeholder(tf.float32, shape=(1, latent_dim), name='decoder_input') with conv_arg_scope(): encoder_input = tf.placeholder(tf.float32, shape=(1, *x_shape), name='encoder_input') encoder_output, _ = build_conv1_encoder(encoder_input, latent_dim) decoder_output = build_conv1_decoder(decoder_input, x_shape) decoder_output = tf.reshape(decoder_output, x_shape) return data_pipeline, encoder_input, encoder_output, decoder_input, decoder_output
def make_simple(latent_dim, sequence_length): huzz = HuzzerSource() data_pipeline = OneHotVecotorizer(TokenDatasource(huzz), 54, 256) x_shape = (sequence_length, 54) encoder_input = tf.placeholder(tf.float32, shape=(1, *x_shape), name='encoder_input') x_flat = slim.flatten(encoder_input) z = slim.fully_connected(x_flat, latent_dim, scope='encoder_output', activation_fn=tf.tanh) encoder_output = tf.identity(z, 'this_is_output') decoder_input = tf.placeholder(tf.float32, shape=(1, latent_dim), name='decoder_input') decoder_output = build_decoder(decoder_input, x_shape) return data_pipeline, encoder_input, encoder_output, decoder_input, decoder_output
def make_special_conv(): latent_dim = 64 x_shape = (128, 54) huzz = HuzzerSource() data_pipeline = OneHotVecotorizer(TokenDatasource(huzz), x_shape[1], x_shape[0]) decoder_input = tf.placeholder(tf.float32, shape=(1, latent_dim), name='decoder_input') encoder_input = tf.placeholder(tf.float32, shape=(1, *x_shape), name='encoder_input') with conv_arg_scope(): encoder_output, _ = build_special_conv_encoder(encoder_input, latent_dim) decoder_output = build_special_conv_decoder(decoder_input, x_shape) decoder_output = tf.nn.softmax(decoder_output, dim=-1) decoder_output = tf.squeeze(decoder_output, 0) return data_pipeline, encoder_input, encoder_output, decoder_input, decoder_output
def make_simple_sss(latent_dim=32): x_shape = (128, 54) huzz = HuzzerSource() data_pipeline = OneHotVecotorizer(TokenDatasource(huzz), x_shape[1], x_shape[0]) encoder_input = tf.placeholder(tf.float32, shape=(1, *x_shape), name='encoder_input') x_flat = slim.flatten(encoder_input) z = slim.fully_connected(x_flat, latent_dim, scope='encoder_output', activation_fn=tf.tanh) decoder_input = tf.placeholder(tf.float32, shape=(1, latent_dim), name='decoder_input') decoder_output = build_decoder(decoder_input, x_shape, activation=tf.nn.relu6) decoder_output = tf.reshape(decoder_output, x_shape) return data_pipeline, encoder_input, z, decoder_input, decoder_output
def autoencode(): huzz = HuzzerSource() data_pipeline = OneHotVecotorizer(TokenDataSource(huzz), 54, 256) x_shape = (256, 54) latent_dim = 16 x = tf.placeholder(tf.float32, shape=(1, *x_shape), name='encoder_input') x_flat = slim.flatten(x) z = slim.fully_connected(x_flat, latent_dim, scope='encoder_output', activation_fn=tf.tanh) z = tf.identity(z, 'this_is_output') z = tf.placeholder(tf.float32, shape=(1, 16), name='decoder_input') build_decoder(z, (256, 54)) with tf.Session() as sess: saver = tf.train.Saver() saver.restore( sess, tf.train.latest_checkpoint('experiments/VAE_baseline/simple')) def a(example_data=None): if example_data is None: key = str(randint(0, 100000000)) code = huzz[key] print('INPUT CODE FOR NETWORK:') print(code) example_data = data_pipeline[key] example_data = np.reshape(example_data, (1, *example_data.shape)) return sess.run(['this_is_output:0'], feed_dict={NAMES['encoder_input']: example_data}) def g(latent_rep): return sess.run(NAMES['decoder_output'], feed_dict={NAMES['decoder_input']: latent_rep[0]}) for i in trange(5): key = str(randint(0, 100000000)) code = huzz[key] with open(BASEDIR + 'simple_examples/auto_{}_input.hs'.format(i), 'w') as f: f.write(code) example_data = data_pipeline[key] imsave(BASEDIR + 'simple_examples/auto_{}_input.png'.format(i), example_data.astype(np.float32).T) latent_reps = a(example_data) recon = g(latent_reps)[0] imsave(BASEDIR + 'simple_examples/auto_{}_output.png'.format(i), recon.T) tokens = np.argmax(recon, axis=-1) def token_to_string(t): if t == 0: return '' return TOKEN_MAP[t - 1] text = ' '.join([token_to_string(t) for t in tokens]) with open(BASEDIR + 'simple_examples/auto_{}_output.hs'.format(i), 'w') as f: f.write(text)