Exemplo n.º 1
0
path_ckpt = "../trial/ckpt/kudo396"
path_emb = "../data/stance_emb.npy"
path_emb_sp = "../data/stance_emb_sample.npy"

path_vocab = "../trial/data/vocab.model"
path_data = "../data/stance.npz"

from model import tf, vAe
from util import partial
from util_io import load_txt
from util_np import np, partition, vpack
import util_sp as sp
from util_np import vpack

# load test sentences
text = np.load(path_data)['text']

# load sentencepiece model
vocab = sp.load_spm(path_vocab)

# Load the model
model = vAe('infer')
# Restore the session
sess = tf.InteractiveSession()
tf.train.Saver().restore(sess, path_ckpt)

################################
# deterministic representation #
################################

# encode text with sentence piece model
Exemplo n.º 2
0
from util_sp import load_spm, encode_capped, encode_capped_sample_pair
from util_tf import tf, pipe

config = load_json(A.config)
P = Record(config['paths'])
C = Record(config['model'])
T = Record(config['train'])

tf.set_random_seed(A.seed)

#############
# load data #
#############

vocab = load_spm(P.vocab)
valid = np.load(P.valid)


def batch(size=T.batch_train,
          path=P.train,
          vocab=vocab,
          seed=A.seed,
          kudo=A.sample,
          max_len=T.max_len):
    pac = lambda arrs: vpack(arrs, (size, max(map(len, arrs))), eos, np.int32)
    enc = encode_capped_sample_pair if kudo else encode_capped
    raw = tuple(load_txt(path))
    eos = vocab.eos_id()
    bat = []
    for i in sample(len(raw), seed):
        if size == len(bat):
Exemplo n.º 3
0
from trial import config as C, paths as P, train as T
from util import partial, comp, select
from util_io import pform, load_txt, save_txt
from util_np import np, partition, batch_sample
from util_sp import load_spm, encode, decode
from util_tf import tf, pipe
tf.set_random_seed(C.seed)

C.trial = 'm4_'

#############
# load data #
#############

# valid_en, train_en = np.load(pform(P.data, "valid_en.npy")), np.load(pform(P.data, "train_en.npy"))
valid_nl, train_nl = np.load(pform(P.data, "valid_nl.npy")), np.load(pform(P.data, "train_nl.npy"))
# valid_de, train_de = np.load(pform(P.data, "valid_de.npy")), np.load(pform(P.data, "train_de.npy"))
valid_da, train_da = np.load(pform(P.data, "valid_da.npy")), np.load(pform(P.data, "train_da.npy"))
# valid_sv, train_sv = np.load(pform(P.data, "valid_sv.npy")), np.load(pform(P.data, "train_sv.npy"))

train_nl = train_nl[:2**17].copy()
train_da = train_da[:2**17].copy()

data_index =        1,        3
data_valid = valid_nl, valid_da
data_train = train_nl, train_da

def batch(arrs, size= C.batch_train, seed= C.seed):
    size //= len(arrs) * (len(arrs) - 1)
    for i in batch_sample(len(arrs[0]), size, seed):
        yield tuple(arr[i] for arr in arrs)
Exemplo n.º 4
0
from tqdm import tqdm
from trial import config as C, paths as P, train as T
from util import partial, comp, select
from util_io import pform, load_txt, save_txt
from util_np import np, partition, batch_sample
from util_sp import load_spm, encode, decode
from util_tf import tf, pipe
tf.set_random_seed(C.seed)

C.trial = 'm1_'

#############
# load data #
#############

valid_en, train_en = np.load(pform(P.data, "valid_en.npy")), np.load(pform(P.data, "train_en.npy"))
# valid_nl, train_nl = np.load(pform(P.data, "valid_nl.npy")), np.load(pform(P.data, "train_nl.npy"))
valid_de, train_de = np.load(pform(P.data, "valid_de.npy")), np.load(pform(P.data, "train_de.npy"))
# valid_da, train_da = np.load(pform(P.data, "valid_da.npy")), np.load(pform(P.data, "train_da.npy"))
valid_sv, train_sv = np.load(pform(P.data, "valid_sv.npy")), np.load(pform(P.data, "train_sv.npy"))

data_index =        0,        2,        4
data_valid = valid_en, valid_de, valid_sv
data_train = train_en, train_de, train_sv

def batch(arrs, size= C.batch_train, seed= C.seed):
    size //= len(arrs) * (len(arrs) - 1)
    for i in batch_sample(len(arrs[0]), size, seed):
        yield tuple(arr[i] for arr in arrs)

perm = comp(tuple, partial(permutations, r= 2))
Exemplo n.º 5
0
path_ckpt = "../trial/ckpt/kudo396"
path_emb = "../data/test_data_emb.npy"
path_emb_sp = "../data/test_data_emb_sample.npy"

path_vocab = "../trial/data/vocab.model"
path_data = "../data/test_data.npz"

from model import tf, vAe, encode, decode
from util import partial
from util_io import load_txt
from util_np import np, partition, vpack
import util_sp as sp
from util_np import vpack

# load test sentences
text = np.load(path_data)["posts"]

# load sentencepiece model
vocab = sp.load_spm(path_vocab)

# Load the model
model = vAe('infer')
# Restore the session
sess = tf.InteractiveSession()
tf.train.Saver().restore(sess, path_ckpt)

################################
# deterministic representation #
################################

# encode text with sentence piece model
Exemplo n.º 6
0
path_csv = "../docs/results_iac/clustering.csv"
path_emb = "../data/test_data_emb.npy"
path_emb_sp = "../data/test_data_emb_sample.npy"

from model import tf, vAe, encode, decode
from util import partial
from util_io import load_txt, save_txt
from util_np import np, partition, vpack
from util_np import vpack
import pandas as pd
import util_sp as sp

# load data
df = pd.read_csv(path_csv)
emb = np.load(path_emb)
emb_sp = np.load(path_emb_sp)

# load sentencepiece model
vocab = sp.load_spm(path_vocab)

# Load the model
model = vAe('infer')
# Restore the session
sess = tf.InteractiveSession()
tf.train.Saver().restore(sess, path_ckpt)

###########################
# generate from centroids #
###########################
Exemplo n.º 7
0
from model import Transformer
from os.path import expanduser, join
from tqdm import tqdm
from util import PointedIndex
from util_io import decode
from util_np import np, permute
from util_tf import tf, batch

logdir = expanduser("~/cache/tensorboard-logdir/explicharr")
tf.set_random_seed(0)

###############
# preparation #
###############

src_train = np.load("trial/data/train_src.npy")
tgt_train = np.load("trial/data/train_tgt.npy")
src_valid = np.load("trial/data/valid_src.npy")
tgt_valid = np.load("trial/data/valid_tgt.npy")
assert src_train.shape[1] <= len_cap
assert tgt_train.shape[1] <= len_cap
assert src_valid.shape[1] <= len_cap
assert tgt_valid.shape[1] <= len_cap
epoch = len(src_train) // batch_size

# # for profiling
# from util_tf import profile
# m = Transformer.new().data()
# forcing = m.forcing(trainable= False)
# autoreg = m.autoreg(trainable= False)
# feed = {m.src_: src_train[:batch_size], m.tgt_: tgt_train[:batch_size]}
Exemplo n.º 8
0
from model import Transformer
from os.path import expanduser, join
from tqdm import tqdm
from util import comp
from util_io import path, save
from util_np import np, vpack, c2r
from util_tf import tf, batch

logdir = expanduser("~/cache/tensorboard-logdir/i-synth")
tf.set_random_seed(0)

###############
# preparation #
###############

index = np.load("trial/data/index.npy").item()
texts = np.load("trial/data/texts.npy")
names = np.load("trial/data/names.npy")
epoch, split = divmod(len(texts), batch_size)
print("{} batches of {} training instances, {} validation".format(
    epoch, batch_size, split))


def load_batch(names, load=comp(np.load, "trial/data/grams/{}.npy".format)):
    names = names.astype(np.str)
    x = vpack(map(load, names), complex('(nan+nanj)'), 1, 1)
    # x = vpack(map(comp(load, path), names), complex('(nan+nanj)'), 1, 1)
    x[:, 0] = 0j
    x = c2r(x)
    _, t, d = x.shape
    assert t <= len_cap
def train(anomaly_class=8,
          dataset="cifar",
          n_dis=1,
          epochs=25,
          dim_btlnk=32,
          batch_size=64,
          loss="mean",
          context_weight=1,
          dim_d=64,
          dim_g=64,
          extra_layers=0,
          gpu="0"):

    #set gpu
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu

    path_log = f"/cache/tensorboard-logdir/{dataset}"
    path_ckpt = "/project/multi-discriminator-gan/ckpt"
    path_data = "/project/multi-discriminator-gan/data"

    #reset graphs and fix seeds
    tf.reset_default_graph()
    if 'sess' in globals(): sess.close()
    rand = RandomState(0)
    tf.set_random_seed(0)

    #load data
    if dataset == "ucsd1":
        x_train = np.load("./data/ucsd1_train_x.npz")["arr_0"] / 255
        y_train = np.load("./data/ucsd1_train_y.npz")["arr_0"]
        x_test = np.load("./data/ucsd1_test_x.npz")["arr_0"] / 255
        y_test = np.load("./data/ucsd1_test_y.npz")["arr_0"]

    elif dataset == "uscd2":
        x_train = np.load("./data/ucsd2_train_x.npz")["arr_0"]
        y_train = np.load("./data/ucsd2_train_y.npz")["arr_0"]
        x_test = np.load("./data/ucsd2_test_x.npz")["arr_0"]
        y_test = np.load("./data/ucsd2_test_y.npz")["arr_0"]

    else:
        if dataset == "mnist":
            (train_images, train_labels), (
                test_images,
                test_labels) = tf.keras.datasets.mnist.load_data()
            train_images = resize_images(train_images)
            test_images = resize_images(test_images)
        else:
            (train_images, train_labels), (
                test_images,
                test_labels) = tf.keras.datasets.cifar10.load_data()
            train_labels = np.reshape(train_labels, len(train_labels))
            test_labels = np.reshape(test_labels, len(test_labels))

        inlier = train_images[train_labels != anomaly_class]
        #data_size = prod(inlier[0].sha
        x_train = inlier / 255
        #x_train = np.reshape(inlier, (len(inlier), data_size))/255
        #y_train = train_labels[train_labels!=anomaly_class]
        y_train = np.zeros(len(x_train), dtype=np.int8)  # dummy
        outlier = train_images[train_labels == anomaly_class]
        x_test = np.concatenate([outlier, test_images]) / 255
        #x_test = np.reshape(np.concatenate([outlier, test_images])
        #                    ,(len(outlier)+len(test_images), data_size))/255
        y_test = np.concatenate(
            [train_labels[train_labels == anomaly_class], test_labels])
        y_test = [0 if y != anomaly_class else 1 for y in y_test]
        x_test, y_test = unison_shfl(x_test, np.array(y_test))

    img_size_x = x_train[0].shape[0]
    img_size_y = x_train[0].shape[1]
    channel = x_train[0].shape[-1]
    trial = f"{dataset}_{loss}_dis{n_dis}_{anomaly_class}_w{context_weight}_btlnk{dim_btlnk}_d{dim_d}_g{dim_g}e{extra_layers}"

    # data pipeline
    batch_fn = lambda: batch2(x_train, y_train, batch_size)
    x, y = pipe(batch_fn, (tf.float32, tf.float32), prefetch=4)
    #z = tf.random_normal((batch_size, z_dim))

    # load graph
    mg_gan = MG_GAN.new(img_size_x,
                        channel,
                        dim_btlnk,
                        dim_d,
                        dim_g,
                        n_dis,
                        extra_layers=0)
    model = MG_GAN.build(mg_gan, x, y, context_weight, loss)

    # start session, initialize variables

    sess = tf.InteractiveSession()
    saver = tf.train.Saver()

    wrtr = tf.summary.FileWriter(pform(path_log, trial))
    wrtr.add_graph(sess.graph)

    ### if load pretrained model
    # pretrain = "modelname"
    #saver.restore(sess, pform(path_ckpt, pretrain))
    ### else:
    auc_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope='AUC')
    init = tf.group(tf.global_variables_initializer(),
                    tf.variables_initializer(var_list=auc_vars))
    sess.run(init)

    #if "ucsd" in dataset:
    summary_test = tf.summary.merge([
        tf.summary.scalar('g_loss', model.g_loss),
        tf.summary.scalar("lambda", model.lam),
        tf.summary.scalar("gl_rec", model.gl_rec),
        tf.summary.scalar("gl_adv", model.gl_adv),
        tf.summary.scalar("gl_lam", model.gl_lam),
        tf.summary.scalar('d_loss_mean', model.d_loss_mean),
        tf.summary.scalar('d_max', model.d_max)
        #, tf.summary.scalar('d_loss', model.d_loss)
        ,
        tf.summary.scalar("AUC_gx", model.auc_gx)
    ])
    if dataset == "ucsd1":
        summary_images = tf.summary.merge(
            (tf.summary.image("gx", model.gx, max_outputs=8),
             tf.summary.image("x", model.x, max_outputs=8),
             tf.summary.image(
                 'gx400',
                 spread_image(tf.concat([model.gx, model.x], axis=1), 8, 2,
                              img_size_x, img_size_y, channel))))
    else:
        summary_images = tf.summary.merge(
            (tf.summary.image("gx", model.gx, max_outputs=8),
             tf.summary.image(
                 'gx400',
                 spread_image(model.gx[:400], 20, 20, img_size_x, img_size_y,
                              channel)),
             tf.summary.image("x", model.x, max_outputs=8)))

    if n_dis > 1:
        d_wrtr = {
            i: tf.summary.FileWriter(pform(path_log, trial + f"d{i}"))
            for i in range(n_dis)
        }
        summary_discr = {
            i: tf.summary.scalar('d_loss_multi', model.d_loss[i])
            for i in range(n_dis)
        }

    def summ(step):
        fetches = model.g_loss, model.lam, model.d_loss_mean, model.auc_gx
        results = map(
            np.mean,
            zip(*(sess.run(fetches, {
                model['x']: x_test[i:j],
                model['y']: y_test[i:j]
            }) for i, j in partition(len(x_test), batch_size, discard=False))))
        results = list(results)
        wrtr.add_summary(sess.run(summary_test, dict(zip(fetches, results))),
                         step)

        if dataset == "ucsd1":
            # bike, skateboard, grasswalk, shopping cart, car, normal, normal, grass
            wrtr.add_summary(
                sess.run(
                    summary_images, {
                        model.x:
                        x_test[[990, 1851, 2140, 2500, 2780, 2880, 3380, 3580]]
                    }), step)
        else:
            wrtr.add_summary(sess.run(summary_images, {model.x: x_test}), step)
        wrtr.flush()

    def summ_discr(step):
        fetches = model.d_loss
        results = map(
            np.mean,
            zip(*(sess.run(fetches, {
                model['x']: x_test[i:j],
                model['y']: y_test[i:j]
            }) for i, j in partition(len(x_test), batch_size, discard=False))))
        results = list(results)
        if n_dis > 1:  # put all losses of the discriminators in one plot
            for i in range(n_dis):
                d_wrtr[i].add_summary(
                    sess.run(summary_discr[i], dict(zip(fetches, results))),
                    step)
                #d_wrtr[i].add_summary(sess.run(summary_discr[i], dict([(fetches[i], results[i])])), step)
                d_wrtr[i].flush()

    #def log(step
    #        , wrtr= wrtr
    #        , log = tf.summary.merge([tf.summary.scalar('g_loss', model.g_loss)
    #                                  , tf.summary.scalar('d_loss', tf.reduce_mean(model.d_loss))
    #                                  , tf.summary.scalar("lambda", model.lam)
    #                                  , tf.summary.image("gx", model.gx, max_outputs=5)
    #                                  , tf.summary.image('gx400', spread_image(model.gx[:400], 20,20, img_size, img_size, channel))
    #                                  #, tf.summary.scalar("AUC_dgx", model.auc_dgx)
    #                                  #, tf.summary.scalar("AUC_dx", model.auc_dx)
    #                                  , tf.summary.scalar("AUC_gx", model.auc_gx)])
    #        , y= y_test
    #        , x= x_test):
    #    wrtr.add_summary(sess.run(log, {model["x"]:x
    #                                    , model["y"]:y})
    #                     , step)
    #    wrtr.flush()

    steps_per_epoch = len(x_train) // batch_size - 1
    for epoch in tqdm(range(epochs)):
        for i in range(steps_per_epoch):
            #sess.run(model["train_step"])
            sess.run(model['d_step'])
            sess.run(model['g_step'])
        # tensorboard writer
        #if "ucsd" in dataset:
        summ(sess.run(model["step"]) // steps_per_epoch)
        #else:
        #    log(sess.run(model["step"])//steps_per_epoch)
        if n_dis > 1:
            summ_discr(sess.run(model["step"]) // steps_per_epoch)

    saver.save(sess, pform(path_ckpt, trial), write_meta_graph=False)