Пример #1
0
def test_mnist(n_examples, num_hidden, epochs, learn_rate):
   """

   Example test case: 

   Load train set images, then train RBM,
   then use several test set images to measure the
   reconstruction error on the unseen data points.
   Fnally, save the reconstructed images and learned weights 
   in the "Output" folder.

   """

   # load data
   images, labels = load_mnist(n_examples, training = True)

   # train one layer of RBM
   w, a, b  = rbm(images, num_hidden, learn_rate, epochs, batchsize = 100)

   # save all weights
   print("Saving weights...")
   save_weights(w, a, b, "Output", n_examples, num_hidden)
   
   # try to reconstruct some test set images
   print("Generating and saving the reconstructed images...")
   images, labels = load_mnist(10, training = False)
   for i in range(10):
      data = images[i]
      save_mnist_image(data, "Output", str(i) + "original.png")
      data = reconstruct(data, w, a, b)
      save_mnist_image(data, "Output", str(i) + "reconstructed.png")
   print("Done!")
Пример #2
0
def test_mnist(n_examples, num_hidden, epochs, learn_rate, k):

    # load data
    images, labels = load_mnist(n_examples, training=True)

    # train one layer of RBM
    w, a, b = rbm(images, num_hidden, learn_rate, epochs, k, batchsize=30)

    # Load weights #NITESH
    # w = np.load('./Output_weights/w_v60000_h300.npy')
    # a = np.load('./Output_weights/a_v60000_h300.npy')
    # b = np.load('./Output_weights/b_v60000_h300.npy')
    # save all weights
    print("Saving weights...")
    save_weights(w, a, b, "Output", n_examples, num_hidden)

    # try to reconstruct some test set images
    print("Generating and saving the reconstructed images...")
    samples = 60000
    images, labels = load_mnist(samples, training=True)
    visible_rep = np.zeros((samples, 784))
    visible_label = np.zeros((samples, 1))

    hidden_rep = np.zeros((samples, num_hidden))
    hidden_label = np.zeros((samples, 1))
    i = 59001
    data = images[i]
    save_mnist_image(data, "Nitesh", str(i) + "original.png")
    for i in range(samples):
        data = images[i]
        save_mnist_image(data, "Output", str(i) + "original.png")
        data1 = reconstruct(data, w, a, b)
        visible_rep[i] = data1
        visible_label[i] = labels[i]
        data2 = sample_hidden(data, w, b)
        hidden_rep[i] = data2
        hidden_label[i] = labels[i]
        save_mnist_image(data1, "Output", str(i) + "reconstructed_visible.png")
        # NITESH
        #save_mnist_image(data2, "Output", str(i) + "reconstructed_hidden.png", hidden=True,num_hidden=num_hidden)
    print("Done!")

    #Nitesh
    # print("*******************************************")
    # print('Reconstructed Images {}'.format(data.shape)) # 1 bacth
    # print("*******************************************")
    np.savetxt(
        'visible/visible_representation_n_h_' + str(num_hidden) + '_k_' +
        str(k) + '.txt', visible_rep)
    np.savetxt(
        'visible/visible_representation_labels_n_h_' + str(num_hidden) +
        '_k_' + str(k) + '.txt', visible_label)

    np.savetxt(
        'hidden/hidden_representation_nh_' + str(num_hidden) + '_k_' + str(k) +
        '.txt', hidden_rep)
    np.savetxt(
        'hidden/hidden_representation_labels_nh_' + str(num_hidden) + '_k_' +
        str(k) + '.txt', hidden_label)
Пример #3
0
def classify_test(indi, pipeline, out_path):
    log.debug("Loading testing data")
    test_X, _ = load_mnist(None, with_y=False, path="data/mnist_test.csv")

    log.debug("Predicting")
    observed_y = pipeline.predict(test_X)

    log.debug("Received %s predictions" % (observed_y.shape))

    f = open(out_path, "w")
    for y in observed_y:
        f.write("%s\n" % y)
    f.close()

    log.debug("Done, saved into %s" % out_path)

    for test_X, observed_y in random.sample(zip(test_X, observed_y), 100):
        print "Guessed", observed_y, "for"

        for row in chunks(test_X, 28):
            rstr = ""
            for col in row:
                rstr += str(int(col)).center(4)

            print rstr

        print ""
        print ""

    log.debug(indi)
Пример #4
0
def classify_test(indi, pipeline, out_path):
    log.debug("Loading testing data")
    test_X, _ = load_mnist(None, with_y=False, path="data/mnist_test.csv")

    log.debug("Predicting")
    observed_y = pipeline.predict(test_X)

    log.debug("Received %s predictions" % (observed_y.shape))

    f = open(out_path, "w")
    for y in observed_y:
        f.write("%s\n" % y)
    f.close()

    log.debug("Done, saved into %s" % out_path)

    for test_X, observed_y in random.sample(zip(test_X, observed_y), 100):
        print "Guessed", observed_y, "for"

        for row in chunks(test_X, 28):
            rstr = ""
            for col in row:
                rstr += str(int(col)).center(4)

            print rstr

        print ""
        print ""

    log.debug(indi)
Пример #5
0
def load_dataset(num_samples):
    global X
    global y

    ############################
    # Load the initial dataset #
    ############################
    X, y = load_mnist(num_samples)

    return X, y
Пример #6
0
def load_dataset(num_samples):
    global X
    global y

    ############################
    # Load the initial dataset #
    ############################
    X, y = load_mnist(num_samples)

    return X, y
Пример #7
0
                with open(savepath, 'wb') as f:
                    pickle.dump(random_paths, f)

                print(f'Saved {ind} videos')
                with open(dataset_path + f"/data_{batch_no}.pickle",
                          "wb") as f:
                    save_dict = {
                        "videos": videos.astype(np.int32),
                        "labels": labels,
                        "videos_digits": videos_digits.astype(np.int32),
                        "videos_digits_coords": videos_digits_coords,
                    }
                    pickle.dump(save_dict, f)

                videos = np.zeros((1000, FLAGS.video_frames, 64, 64))
                videos_digits = np.zeros((1000, FLAGS.max_digits_in_frame))
                videos_digits_coords = np.zeros(
                    (1000, FLAGS.video_frames, FLAGS.max_digits_in_frame, 2))

                batch_ind = 0
                batch_no += 1


if __name__ == "__main__":
    data = load_mnist()

    if FLAGS.split == "test":
        FLAGS.dataset_multiplier = 1

    make_video_sincron(data)
Пример #8
0
def train(config_loader):
    """
    train main loop
    :param config_loader:
    :return:
    """
    # # since the number of the ship images in cifar10-batch1 is 1025,
    # the first 1000 images are for training, then the next 25 images are for testing
    train_image_num = 1000
    test_image_num = 25

    # start
    print("initial training progress....")

    # load data
    if "cifar" in config_loader.data_dir:
        print("load cifar data....")
        images, test_images = loader.load_cifar(
            config_loader.data_dir, train_image_num, test_image_num)
    elif "mnist" in config_loader.data_dir:
        print("load mnist data....")
        images, test_images = loader.load_mnist(
            config_loader.data_dir, train_image_num, test_image_num)
    else:
        print("neither cifar nor mnist data found...")
        return

    print("create mask list....")
    mask_list = create_mask_list()

    # checkpoint instance
    print("initial checkpoint....")
    checkpoint_prefix = os.path.join(config_loader.checkpoints_dir, "ckpt")

    # The call function of Generator and Discriminator have been decorated
    # with tf.contrib.eager.defun()
    # We get a performance speedup if defun is used (~25 seconds per epoch)
    print("initial encoder....")
    encoder = auto.Encoder()
    print("initial decoder....")
    decoder = auto.Decoder()

    # initial optimizer
    print("initial optimizer....")
    train_optimizer = tf.train.AdamOptimizer(2e-4, beta1=0.5)

    checkpoint = tf.train.Checkpoint(
        train_optimizer=train_optimizer, encoder=encoder, decoder=decoder)

    print("initial train log....")
    log_tool = train_tool.LogTool(
        config_loader.log_dir, config_loader.save_period)

    # restoring the latest checkpoint in checkpoint_dir if necessary
    if config_loader.load_latest_checkpoint is True:
        checkpoint.restore(tf.train.latest_checkpoint(
            config_loader.checkpoints_dir))
        print("load latest checkpoint....")

    
    def train_each_round(epoch):
        # initial input number
        image_num = 1
        # initial loss
        train_loss = 0
        # calculate image length
        image_len = images.shape[0]
        # each epoch, run all the images
        for i in range(image_len):
            # print("input_image {}".format(image_num))
            input_image = images[i:i + 1, :, :, :]
            # calculate input number
            image_num = image_num + 1

            with tf.GradientTape() as train_tape:
                # global train
                train_loss = global_train_iterator(input_image=input_image, mask_list=mask_list, train_tape=train_tape,
                                                   encoder=encoder, decoder=decoder, train_optimizer=train_optimizer)

        # save test result
        if epoch % config_loader.save_period == 0:
            rand = random.randint(0, test_images.shape[0] - 1)
            # get a random image
            input_image = test_images[rand:rand + 1, :, :, :]
            # show encoder output
            encoder_output = encoder(input_image, training=True)
            # crop the encoder output
            encoder_output = crop_image(encoder_output, mask_list)
            # decoder
            decoder_output = decoder(encoder_output, training=True)
            titles = ["IN", "EN", "DE"]
            image_list = [input_image, tf.reshape(
                encoder_output, [1, 128, 128, 3]), decoder_output]
            log_tool.save_image_list(image_list=image_list, title_list=titles)
            # evaluate in test data
            test_loss = evaluate_test_loss(test_images=test_images, image_num=test_image_num, encoder=encoder,
                                           decoder=decoder)
            # save loss and test loss in log file
            log_tool.save_loss(train_loss=train_loss, test_loss=test_loss)

     # start training
    foreach_training(log_tool=log_tool, checkpoint=checkpoint, checkpoint_prefix=checkpoint_prefix,
                     config_loader=config_loader, train_each_round=train_each_round)
Пример #9
0
def one_hot(y):
    y_one_hot = np.zeros((len(y), 10))
    for i, label in enumerate(y):
        y_one_hot[i, label] = 1
    return y_one_hot


if __name__ == "__main__":

    output = 10  # 输出数据大小
    input = 784  # 输入数据大小

    import loader

    # mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
    train_x, train_y = loader.load_mnist('./mnist/', kind='train')
    test_x, test_y = loader.load_mnist('./mnist/', kind='t10k')
    # In this example, we limit mnist data
    train_y = one_hot(train_y)
    test_y = one_hot(test_y)
    Xtr, Ytr = train_x[:
                       50000], train_y[:
                                       50000]  # 5000 for training (nn candidates)

    Xte, Yte = test_x[:5000], test_y[:5000]

    train_samples = []
    train_labels = []
    for i in range(len(Xtr)):
        train_samples.append(Xtr[i])
        train_labels.append(Ytr[i])
Пример #10
0
def train_type_2():
    FLAGS = get_args()
    if FLAGS.gan_type == 'infogan':
        gan_model = infoGAN
        print('**** InfoGAN ****')
    else:
        raise ValueError('Wrong GAN type!')

    save_path = os.path.join(SAVE_PATH, FLAGS.gan_type)
    save_path += '/'

    # load dataset
    if FLAGS.dataset == 'celeba':
        im_size = 32
        n_channels = 3
        n_continuous = 5
        n_discrete = 0
        cat_n_class_list = [10 for i in range(n_discrete)]
        max_grad_norm = 0.

        train_data = loader.load_celeba(FLAGS.bsize,
                                        data_path=CELEBA_PATH,
                                        rescale_size=im_size)
    else:
        im_size = 28
        n_channels = 1
        n_continuous = 4
        n_discrete = 1
        cat_n_class_list = [10]
        max_grad_norm = 10.

        train_data = loader.load_mnist(FLAGS.bsize, data_path=MNIST_PATH)

    train_model = gan_model(input_len=FLAGS.zlen,
                            im_size=im_size,
                            n_channels=n_channels,
                            cat_n_class_list=cat_n_class_list,
                            n_continuous=n_continuous,
                            n_discrete=n_discrete,
                            mutual_info_weight=FLAGS.w_mutual,
                            max_grad_norm=max_grad_norm)
    train_model.create_train_model()

    generate_model = gan_model(input_len=FLAGS.zlen,
                               im_size=im_size,
                               n_channels=n_channels,
                               cat_n_class_list=cat_n_class_list,
                               n_continuous=n_continuous,
                               n_discrete=n_discrete)
    generate_model.create_generate_model()

    sessconfig = tf.ConfigProto()
    sessconfig.gpu_options.allow_growth = True
    with tf.Session(config=sessconfig) as sess:
        writer = tf.summary.FileWriter(save_path)
        saver = tf.train.Saver()
        sess.run(tf.global_variables_initializer())
        writer.add_graph(sess.graph)
        for epoch_id in range(FLAGS.maxepoch):
            train_model.train_epoch(sess,
                                    train_data,
                                    init_lr=FLAGS.lr,
                                    n_g_train=FLAGS.ng,
                                    n_d_train=FLAGS.nd,
                                    keep_prob=FLAGS.keep_prob,
                                    summary_writer=writer)
            generate_model.generate_samples(sess,
                                            keep_prob=FLAGS.keep_prob,
                                            file_id=epoch_id,
                                            save_path=save_path)
            saver.save(
                sess, '{}gan-{}-epoch-{}'.format(save_path, FLAGS.gan_type,
                                                 epoch_id))
        saver.save(
            sess, '{}gan-{}-epoch-{}'.format(save_path, FLAGS.gan_type,
                                             epoch_id))
Пример #11
0
def train_type_1():
    FLAGS = get_args()
    if FLAGS.gan_type == 'lsgan':
        gan_model = LSGAN
        print('**** LSGAN ****')
    elif FLAGS.gan_type == 'dcgan':
        gan_model = DCGAN
        print('**** DCGAN ****')
    else:
        raise ValueError('Wrong GAN type!')

    save_path = os.path.join(SAVE_PATH, FLAGS.gan_type)
    save_path += '/'

    # load dataset
    if FLAGS.dataset == 'celeba':
        train_data = loader.load_celeba(FLAGS.bsize, data_path=CELEBA_PATH)
        im_size = 64
        n_channels = 3
    else:
        train_data = loader.load_mnist(FLAGS.bsize, data_path=MNIST_PATH)
        im_size = 28
        n_channels = 1

    # init training model
    train_model = gan_model(input_len=FLAGS.zlen,
                            im_size=im_size,
                            n_channels=n_channels)
    train_model.create_train_model()

    # init generate model
    generate_model = gan_model(input_len=FLAGS.zlen,
                               im_size=im_size,
                               n_channels=n_channels)
    generate_model.create_generate_model()

    # create trainer
    trainer = Trainer(train_model,
                      train_data,
                      moniter_gradient=False,
                      init_lr=FLAGS.lr,
                      save_path=save_path)
    # create generator for sampling
    generator = Generator(generate_model,
                          keep_prob=FLAGS.keep_prob,
                          save_path=save_path)

    sessconfig = tf.ConfigProto()
    sessconfig.gpu_options.allow_growth = True
    with tf.Session(config=sessconfig) as sess:
        writer = tf.summary.FileWriter(save_path)
        saver = tf.train.Saver()
        sess.run(tf.global_variables_initializer())
        writer.add_graph(sess.graph)
        for epoch_id in range(FLAGS.maxepoch):
            trainer.train_epoch(sess,
                                keep_prob=FLAGS.keep_prob,
                                n_g_train=FLAGS.ng,
                                n_d_train=FLAGS.nd,
                                summary_writer=writer)
            generator.random_sampling(sess, plot_size=10, file_id=epoch_id)
            generator.viz_interpolate(sess, file_id=epoch_id)
            if FLAGS.zlen == 2:
                generator.viz_2D_manifold(sess, plot_size=20, file_id=epoch_id)

            saver.save(
                sess, '{}gan-{}-epoch-{}'.format(save_path, FLAGS.gan_type,
                                                 epoch_id))
        saver.save(
            sess, '{}gan-{}-epoch-{}'.format(save_path, FLAGS.gan_type,
                                             epoch_id))
Пример #12
0
    f1 = sklearn.metrics.f1_score(test_y, observed_y)

    if display:
        print sklearn.metrics.classification_report(test_y, observed_y)

    return round(f1 * 100., 2)


if __name__ == '__main__':
    pyvotune.set_debug(True)

    #############################
    ## Load the initial dataset #
    #############################
    X, y = load_mnist()

    print "Dataset loaded"

    print X.shape
    print y.shape

    # Split the dataset into training, testing and then validation parts
    train_X, temp_X, train_y, temp_y = train_test_split(X, y, test_size=0.25)

    print "Split"
    test_X, validate_X, test_y, validate_y = train_test_split(
        temp_X, temp_y, test_size=0.5)

    f = open(sys.argv[1], "rb")
    archive = pickle.load(f)
Пример #13
0
    datab = data[indices[:size]]
    labelb = label[indices[:size]]

    ret_data_mix = []
    ret_label_mix = []
    # ret_data = []
    # ret_label = []

    for da, la, db, lb in zip(data, label, datab, labelb):
        if mix:
            lmbda = np.random.beta(alpha, alpha)
        else:
            lmbda = 0
        ret_data_mix.append(da * lmbda + db * (1 - lmbda))
        ret_label_mix.append(
            la * lmbda + lb *
            (1 - lmbda))  # label represents the component of '1'
        if len(ret_data_mix) == size:
            break

    # print('Data size is %d' % len(ret_data_mix))

    return np.array(ret_data_mix), np.array(ret_label_mix)


if __name__ == '__main__':
    from loader import load_mnist
    data, label, _, __ = load_mnist()
    mixup(data, label)
Пример #14
0
            total_score += (result == labels[i])
        print("corrent rate: %.5f " % (100 * total_score / n), "%")

    def cross_entropy_loss(self, a, y):
        return np.sum(np.nan_to_num(-y * np.log(a) - (1 - y) * np.log(1 - a)))

    def log_likelihood_loss(self, a, y):
        return -np.dot(y, softmax(a).transpose())


def one_hot(y):
    y_one_hot = np.zeros((len(y), 10))
    for i, label in enumerate(y):
        y_one_hot[i, label] = 1
    return y_one_hot


if __name__ == '__main__':
    images, labels = loader.load_mnist("mnist/")
    onehotlabels = one_hot(labels)
    trainingData = np.hstack((images, onehotlabels))
    mlp = MLP([784, 300, 10])
    mlp.fit(trainingData)
    timg, tlab = loader.load_mnist("mnist/", kind="t10k")
    tlab = tlab[:, np.newaxis]
    print(len(timg), len(tlab))
    testData = np.hstack((timg, tlab))
    print(testData.shape)
    test = testData[:10000]
    mlp.predict(test)