Пример #1
0
    def fit(self, X, y):
        """fit the model to data matrix X and target y"""
        mlp_hl_size = 50
        num_classes = np.unique(y).size
        inputs = X
        bias_X = self.add_bias(X)

        # unsupervised training (training auto encoders)
        for i, layer_size in enumerate(self.hidden_layer_sizes):
            auto_encoder = AutoEncoder(hidden_layer_size=layer_size)
            auto_encoder.fit(inputs)
            weights = auto_encoder.get_coefs()
            self.coefs_.append(weights[0])
            inputs = self.forward(bias_X, self.coefs_)  # no bias here

        # supervised training using MLP classifier
        mlp = MLPClassifier()
        mlp.fit(inputs, y)
        #print "MLP Score:", mlp.score(inputs, y)

        mlp_coefs = []
        for i, coefs in enumerate(mlp.coefs_):
            new_coefs = np.vstack((coefs, mlp.intercepts_[i]))
            self.coefs_.append(new_coefs)
        """
Пример #2
0
def train_encoder():
    from AutoEncoder import AutoEncoder

    device = torch.device("cuda")
    enc = AutoEncoder(3).to(device)

    criterion = nn.MSELoss()
    optimizer = optim.Adam(enc.parameters(), lr=ETA)

    loader = get_cifar10()

    for e in range(EPOCHS):
        train_loss = 0.0

        for images, _, in loader:
            images = images.to(device)

            _, decoded = enc(images)
            assert (decoded.size() == images.size())
            loss = criterion(decoded, images)
            train_loss += loss.item()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        train_loss /= len(loader)
        print(f"Epochs {e+1}/{EPOCHS}")
        print(f"Loss: {train_loss:.8f}")

    enc.save("ckpts/encoder_test.pth")
Пример #3
0
    def __init__(self):
        from keras.models import Model
        from keras.layers import Input
        import numpy

        self.auto_loss = 5
        self.cross_loss = 1

        self.a = a = AutoEncoder('ukiyoe')
        self.a.dataset = DataLoader('x2photo/train/ukiyoe',
                                    (a.width, a.height))
        self.b = b = AutoEncoder('photo')
        self.b.dataset = DataLoader('x2photo/train/photo', (b.width, b.height))

        a.discriminator.compile(optimizer='rmsprop',
                                loss='mse',
                                metrics=['accuracy'])
        b.discriminator.compile(optimizer='rmsprop',
                                loss='mse',
                                metrics=['accuracy'])

        a.autoencoder.compile(optimizer='rmsprop',
                              loss='mse',
                              metrics=['accuracy'],
                              loss_weights=[self.auto_loss])
        b.autoencoder.compile(optimizer='rmsprop',
                              loss='mse',
                              metrics=['accuracy'],
                              loss_weights=[self.auto_loss])

        fack_a = a.decoder(b.z)
        fack_b = b.decoder(a.z)

        a.discriminator.trainable = False
        b.discriminator.trainable = False

        da = a.discriminator(fack_a)
        db = b.discriminator(fack_b)

        cross_ab = Model(a.i, db)
        cross_ba = Model(b.i, da)
        cross_ab.compile(optimizer='rmsprop',
                         loss='mse',
                         metrics=['accuracy'],
                         loss_weights=[self.cross_loss])
        cross_ba.compile(optimizer='rmsprop',
                         loss='mse',
                         metrics=['accuracy'],
                         loss_weights=[self.cross_loss])

        class Models:
            pass

        self.models = models = Models()
        models.gab = Model(a.i, fack_b)
        models.gba = Model(b.i, fack_a)
        models.cross_ab = cross_ab
        models.cross_ba = cross_ba
Пример #4
0
    def grow(self, bmu):
        # type: (object) -> object
        p = self.grid[bmu]
        up = p + np.array([0, +1])
        right = p + np.array([+1, 0])
        down = p + np.array([0, -1])
        left = p + np.array([-1, 0])

        neighbors = np.array([up, right, down, left])
        direction = 0
        for nei in neighbors:
            try:
                self.errors[str(list(nei))] += self.errors[bmu] * self.fd
            except KeyError:
                try:
                    w1, w2, b1, b2 = self.type_b(nei, direction)
                    if np.isnan(w1).any():
                        print 'shit'
                except:
                    try:
                        w1, w2, b1, b2 = self.type_a(nei, direction)
                        if np.isnan(w1).any():
                            print 'shit'
                    except:
                        try:
                            w1, w2, b1, b2 = self.type_c(nei, direction)
                            if np.isnan(w1).any():
                                print 'shit'

                        except:
                            w1 = None
                            w2 = None#np.ones((self.hid, self.dims))
                            b1 = None#np.ones(self.hid)
                            b2 = None#np.ones(self.dims)

                #     if new_a.any():
                #         if newf_c.any():
                #             # w.fill(0.5)
                #         else:
                #             w = new_c
                #     else:
                #         w = new_a
                # else:
                #     w = new_b

                AE = AutoEncoder(self.dims, self.hid, self.s1, self.m1)
                # if np.isnan(w1).any():
                #     print 'shit'
                AE.set_params(w1, w2, b1, b2)

                self.learners[str(list(nei))] = AE
                self.grid[str(list(nei))] = list(nei)
                self.errors[str(list(nei))] = self.GT/2
                self.gen[str(list(nei))] = self.current_gen
                self.hits[str(list(nei))] = 0
            direction += 1
        self.errors[bmu] = self.GT / 2
Пример #5
0
 def build(self, data):
     i = 0
     self.ae = AutoEncoder()
     self.ae.init_tf(fin=self.attr_sample_size,
                     fou=1,
                     epochs=10,
                     l_rate=0.01)
     while i < self.forest_size_limit:
         sample_data = numpy.array(random.sample(data, self.sample_size))
         tree = self.build_tree(sample_data, 0)
         self.forest.append(tree)
         i += 1
    def __init__(self):
        super(StackedAutoEncoder, self).__init__()

        # stacking 2 auto encoders
        # 2개의 오토인코더를 스택
        self.encoder1 = AutoEncoder(1)
        self.encoder2 = AutoEncoder(32)

        self.encoders = [
            self.encoder1,
            self.encoder2,
        ]
Пример #7
0
    def __init__(self, env, network, buffer, epsilon=0.05, batch_size=32):

        self.ae = AutoEncoder(25)
        self.ae.load_state_dict(torch.load('lunar_models/code25.pt', map_location=torch.device('cpu')))
        self.env = env
        self.network = network
        self.target_network = deepcopy(network)
        self.buffer = buffer
        self.epsilon = epsilon
        self.batch_size = batch_size
        self.window = 100
        self.reward_threshold = 195  # Avg reward before CartPole is "solved"
        self.initialize()
Пример #8
0
 def __init__(self, gpu_mode):
     indexFile = config.LSFModelInfo['model_dict'] + config.LSFModelInfo[
         'indexFile']
     ntrees = config.LSFModelInfo['nLSFModelTrees']
     feature_dim_used = config.CNNInfo['feature_size']
     model_dict = config.LSFModelInfo['model_dict']
     net = config.CNNInfo['net']
     layer = config.CNNInfo['layer']
     train_path = config.CNNInfo['input_dict']
     if not os.path.exists(model_dict):
         os.makedirs(model_dict)
     logging.info('Loading net and associated files...')
     if config.AutoEncoderInfo['enable_autoencoder']:
         # hack. tensorflow may not work caffee otherwise. initilize tensor
         # before caffe
         gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
         sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
     self.cnn_activation = CnnActivation(
         DeepDescriptor(layer, net, gpu=gpu_mode), model_dict,
         feature_dim_used, train_path, indexFile)
     if config.AutoEncoderInfo['enable_autoencoder']:
         self.autoencoder = AutoEncoder(config.AutoEncoderInfo['n_layers'],
                                        feature_dim_used)
         self.cnn_activation.runAutoEncoder(self.autoencoder)
     self.cnn_activation.EncodeTextureLSFTree(
         ntrees, config.AutoEncoderInfo['enable_autoencoder'])
Пример #9
0
 def __init__(self, opts):
     self._options = opts
     self.model_name = opts.model_name + '_' + opts.tag
     self.log_file = os.path.join(opts.log_dir, self.model_name+'_{}.txt'.format(
         time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time()))))
     self.save_folder = os.path.join(opts.save_dir, self.model_name) \
         if opts.save_folder is None else opts.save_folder
     self.util_folder = os.path.join(opts.util_dir, self.model_name) \
         if opts.util_folder is None else opts.util_folder
     if opts.is_training:
         if os.path.exists(self.log_file):
             del_cmd = input('[Warning][LogFile {} exists][Delete it?]'.format(self.log_file))
             if del_cmd:
                 os.remove(self.log_file)
         if os.path.exists(self.save_folder):
             del_cmd = bool(eval(input('[Warning][SaveFile {} exists][Delete it?]'.format(self.save_folder))))
             if del_cmd:
                 shutil.rmtree(self.save_folder)
         os.mkdir(self.save_folder)
         if os.path.exists(self.util_folder):
             del_cmd = bool(eval(input('[Warning][UtilFile {} exists][Delete it?]'.format(self.util_folder))))
             if del_cmd:
                 shutil.rmtree(self.util_folder)
         os.mkdir(self.util_folder)
     self.streamer = DataStream(opts)
     if torch.cuda.is_available():
         torch.set_default_tensor_type('torch.cuda.FloatTensor')
     self.model = Model(opts, self.streamer.train_user_dat, self.streamer.train_item_dat, self.streamer.user2vec)
     self.epoch = 0
     self.best_score = 1e10
Пример #10
0
 def create(cls, args, train='train.pt', validation='validation.pt'):
     enl, dnl = AutoEncoder.get_non_linearity(args.nonlinearity)
     return Trainer(AutoEncoder(encoder_sizes=args.encoder,
                                encoding_dimension=args.dimension,
                                encoder_non_linearity=enl,
                                decoder_non_linearity=dnl,
                                decoder_sizes=args.decoder),
                    DataLoader(load(join(args.data, train)),
                               batch_size=args.batch,
                               shuffle=True,
                               num_workers=cpu_count()),
                    DataLoader(load(join(args.data, validation)),
                               batch_size=32,
                               shuffle=False,
                               num_workers=cpu_count()),
                    lr=args.lr,
                    weight_decay=args.weight_decay,
                    path=args.data)
Пример #11
0
def create_model(loaded):
    '''
    Create Autoencoder from data that has previously been saved

    Parameters:
        loaded   A model that has been loaded from a file

    Returns:
        newly created Autoencoder
    '''
    old_args = loaded['args_dict']
    enl, dnl = AutoEncoder.get_non_linearity(old_args['nonlinearity'])
    product = AutoEncoder(encoder_sizes=old_args['encoder'],
                          encoding_dimension=old_args['dimension'],
                          encoder_non_linearity=enl,
                          decoder_non_linearity=dnl,
                          decoder_sizes=old_args['decoder'])
    product.load_state_dict(loaded['model_state_dict'])
    return product
Пример #12
0
    def __init__(self, raw_text_path, intermediate_data_path):
        self.raw_text_path = raw_text_path
        self.intermediate_data_path = intermediate_data_path
        #seedFile="seed_file.txt"
        #rawTextFile="train_raw.txt"
        #os.chdir("D:/CS512/src")
        #print "cleaning entities"
        #CE=cleanEntity(self.seed_file)
        #clean_df=CE.pos_tagging()

        print "Generating word embeddings"
        WE = wordEmbeddings(rawTextFile, rawTextPath)
        w2v = WE.w2v_model(size=300)
        #w2v_vocab=w2v.wv.vocab
        WE.get_word_embeddings_df(seedFile, seedPath)

        print " AutoEncoder in progress"

        AE = AutoEncoder("WE.csv", dataPath, size=300)
        AE.detect_anomaly()
Пример #13
0
def test_ae(x_train_, y_train_, x_test_, y_test_):
    """
    AE测试
    """
    rst = []
    # 数据展平
    x_train_ = x_train_.reshape(x_train_.shape[0], -1)
    x_test_ = x_test_.reshape(x_test_.shape[0], -1)
    # 测试数据一半用于svc训练,一半用于测试
    x_test_train = x_test_[:5000]
    y_test_train = y_test_[:5000]
    x_test_test = x_test_[5000:]
    y_test_test = y_test_[5000:]
    # k对应不同降维目标
    for k in range(10, 200, 10):
        # 训练pca
        ae = AutoEncoder(28 * 28, k, 28 * 28)
        ae.fit(x_train_, x_train_)
        # 训练svc分类器
        svc = SVC(gamma='scale')
        svc.fit(ae.encode(x_test_train), y_test_train)
        # 测试分类器
        y_pred = svc.predict(ae.encode(x_test_test))
        accuracy = accuracy_score(y_test_test, y_pred)
        print(accuracy)
        rst.append(accuracy)
    return rst
Пример #14
0
def test_encoder():
    from AutoEncoder import AutoEncoder
    import matplotlib.pyplot as plt

    device = torch.device("cuda")

    enc = AutoEncoder(3).to(device)
    enc.load("ckpts/encoder_test.pth")

    loader = iter(get_cifar10())

    images, _ = next(loader)

    with torch.no_grad():
        _, decoded = enc(images.to(device))
        decoded = decoded.cpu().permute(0, 2, 3, 1).numpy()

        for i in range(10):
            plt.imshow(images[i].permute(1, 2, 0).numpy())
            plt.show()

            plt.imshow(decoded[i])
            plt.show()
Пример #15
0
    def grow(self, bmu):
        # type: (object) -> object
        p = self.grid[bmu]
        up = p + np.array([0, +1])
        right = p + np.array([+1, 0])
        down = p + np.array([0, -1])
        left = p + np.array([-1, 0])

        neighbors = np.array([up, right, down, left])
        for nei in neighbors:
            try:
                self.errors[str(list(nei))] += self.errors[bmu] * self.fd
            except KeyError:
                w1, w2, b1, b2 = self.get_new_weight(bmu, nei)
                AE = AutoEncoder(self.dims, self.hid, self.s1, self.m1, gaussian=self.gaussian)

                AE.set_params(w1, w2, b1, b2)

                self.learners[str(list(nei))] = AE
                self.grid[str(list(nei))] = list(nei)
                self.errors[str(list(nei))] = self.GT/2
                self.gen[str(list(nei))] = self.current_gen
                self.hits[str(list(nei))] = 0
        self.errors[bmu] = self.GT / 2
Пример #16
0
    def __init__(self,
                 coding_dims=[128, 64, 8],
                 dropout=None,
                 dropout_inputs=0,
                 noise=0,
                 loss=keras.objectives.mean_squared_error,
                 batch_size=50,
                 name_tag=None):

        self.loss_mode = 0
        self.loss_weight = 1
        self.distances_raw = None
        self.distances_mean = None
        input_dim = 3 + (len(meta.aalto_hand_data.columns_features)) * 4

        AutoEncoder.__init__(self,
                             input_dim,
                             coding_dims=coding_dims,
                             dropout=dropout,
                             dropout_inputs=dropout_inputs,
                             noise=noise,
                             loss=loss,
                             batch_size=batch_size,
                             name_tag=name_tag)
Пример #17
0
def main(_):
    check_dir()
    print_config()
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
    run_option = tf.ConfigProto(gpu_options=gpu_options)
    with tf.Session(config=run_option) as sess:
        ae = AutoEncoder(config=FLAGS, sess=sess)
        ae.build_model()
        if FLAGS.is_training:
            ae.train_model()
        if FLAGS.is_testing:
            pass
    def __init__(self, np_rng, theano_rng=None, n_ins=784, hidden_layer_sizes=[500, 500], n_outs=10):
        
        self.sigmoid_layers = []
        self.dA_layers = []
        self.params = []
        self.n_layers = len(hidden_layer_sizes)
        
        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(np_rng.randint(2 ** 30))
     
        self.x = T.matrix('x') 
        self.y = T.ivector('y') 
        
        for i in xrange(self.n_layers):
            if i == 0:
                n_in = n_ins
                layer_input = self.x
            else:
                n_in = hidden_layer_sizes[i-1]
                layer_input = self.sigmoid_layers[-1].output

            n_out = hidden_layer_sizes[i]            
            
            sigmoid_layer = HiddenLayer(np_rng, layer_input, n_in, n_out, activation=T.nnet.sigmoid)
            self.sigmoid_layers.append(sigmoid_layer)
            
            self.params.extend(sigmoid_layer.params)
            
            dA_layer = AutoEncoder(np_rng, n_in, n_out, theano_rng=theano_rng, input=layer_input, 
                                   W=sigmoid_layer.W, b_hid=sigmoid_layer.b)
            self.dA_layers.append(dA_layer)
            
        self.log_layer = LogisticRegression(self.sigmoid_layers[-1].output, self.y, hidden_layer_sizes[-1], n_outs)
        self.params.extend(self.log_layer.params)

        self.finetune_cost = self.log_layer.negative_log_likelihood()
        self.errors = self.log_layer.errors()        
Пример #19
0
def train(filename, datasetX, datasetY, encoderPath, modelPath,
          predictionPath):
    # Split into training and testing set.
    index = int(len(datasetX) * TRAINING_DATA_PERCENTAGE)
    trainingX = np.array(datasetX[:index])
    trainingY = np.array(datasetY[:index])
    testingX = np.array(datasetX[index:])
    testingY = np.array(datasetY[index:])

    # Remove timestamps from training.
    trainingY = trainingY.transpose()[1].transpose()
    # Extract timestamps from testing.
    testTargetDates = testingY.transpose()[0].transpose()
    testingY = testingY.transpose()[1].transpose()

    numberOfInputParameters = len(datasetX[0][0])
    inputShape = (LOOKBACK, numberOfInputParameters)

    if USE_AUTOENCODER:
        if os.path.isfile(encoderPath):
            encoder = load_model(encoderPath)
        else:
            aec = AutoEncoder(inputShape)
            encoder = aec.fit(trainingX, testingX)
            encoder.save(encoderPath)

        for layer in encoder.layers:
            layer.trainable = False

    if os.path.isfile(modelPath):
        model = load_model(modelPath)
    else:
        outputShape = (1)
        if USE_AUTOENCODER:
            model = createModelWithEncoder(encoder, outputShape)
        else:
            model = createConvModel(inputShape, outputShape)

        # Fit the model.
        model.fit(trainingX,
                  trainingY,
                  epochs=150,
                  batch_size=256,
                  validation_data=[testingX, testingY],
                  callbacks=[TensorBoard(log_dir='/tmp/')])
        model.save(modelPath)

    # Evaluate the model.
    last = len(trainingX[0][0]) - 1
    mean = np.load(preprocessPath + filename + "_means.npy")[last]
    stddev = np.load(preprocessPath + filename + "_stddev.npy")[last]

    prediction = model.predict(testingX)

    with open(predictionPath, 'w') as csvfile:
        trainScores = model.evaluate(trainingX, trainingY)
        testScores = model.evaluate(testingX, testingY)
        predictionWriter = csv.writer(csvfile,
                                      delimiter=';',
                                      quotechar='|',
                                      quoting=csv.QUOTE_MINIMAL)
        predictionWriter.writerow([
            'Test Loss',
            str(testScores), '', 'Training Loss',
            str(trainScores)
        ])
        predictionWriter.writerow(
            ['Date', 'Prediction', 'Target', 'Prediction/Target'])
        lines = []
        for prediction, target, date in zip(prediction, testingY,
                                            testTargetDates):
            prediction = int(prediction[0] * stddev + mean)
            target = int(target * stddev + mean)
            fraction = prediction / target
            lines += [[
                str(date.date()),
                str(prediction),
                str(target),
                str(fraction)
            ]]

        lines = sorted(lines)

        for line in lines:
            predictionWriter.writerow(line)

        print(testScores)
Пример #20
0
    for axis in axes.flat:

        """ Add row of weights as an image to the plot """

        image = axis.imshow(opt_W1[index, :].reshape(vis_patch_side, vis_patch_side),
                            cmap = plt.cm.gray, interpolation = 'nearest')
        axis.set_frame_on(False)
        axis.set_axis_off()
        index += 1

    """ Show the obtained plot """

    plt.show()


fdat = pd.read_csv('/home/senanayaked/data/mnist_train.csv', header=None)
x = np.array(fdat)[:10000, 1:]
X = np.array(x).astype(float)/255.0

st = time.time()

ae = AutoEncoder(vis=784, hid=100, gaussian=True)

Y=ae.train_batch(X, 400, 0.00075,batch_size=
                 100)
ela = time.time()-st
print ela /1000
plt.imshow(np.reshape(Y[0]*255, (28,28)))
plt.show()

visualizeW1(ae.w1.T, 28, 10)
Пример #21
0
def test_pickled_dA(learning_rate=0.1,
            dataset='../data/mnist.pkl.gz',
            pickle_file='/scratch/z/zhaolei/lzamparo/gpu_tests/dA_results/dA_pickle.save',
            corruption=0.1,
            training_epochs=3,
            batch_size=20):
    """
        Test pickling, unpickling code for the dA class.  Start up a model, train, pickle, unpickle, and continue to train  
        
    """
    
    datasets = load_data(dataset)
    train_set_x, train_set_y = datasets[0]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()    # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    
    ####################################
    # Build the model #
    ####################################

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    da = AutoEncoder(numpy_rng=rng, theano_rng=theano_rng, input=x,
            n_visible=28 * 28, n_hidden=500, loss='xent')

    cost, updates = da.get_cost_updates(corruption_level=0., learning_rate=learning_rate)

    train_da = theano.function([index], cost, updates=updates,
         givens={x: train_set_x[index * batch_size:
                                (index + 1) * batch_size]})

    ############
    # Train the model for 3 epochs #
    ############

    # go through training epochs
    for epoch in xrange(training_epochs):
        # go through training set
        c = []
        for batch_index in xrange(n_train_batches):
            c.append(train_da(batch_index))
    
    ############
    # Pickle the model #
    ############
    
    f = file(pickle_file, 'wb')
    cPickle.dump(da, f, protocol=cPickle.HIGHEST_PROTOCOL)
    f.close()
    
    ############
    # Unpickle the model, try to recover #
    ############
    
    f = file(pickle_file, 'rb')
    pickled_dA = cPickle.load(f)
    f.close()

    ############
    # Compare the two models #
    ###########
    dA_params = da.get_params()
    pickled_params = pickled_dA.get_params()
    
    if not numpy.allclose(dA_params[0].get_value(), pickled_params[0].get_value()):
        print "numpy says that Ws are not close"
    if not numpy.allclose(dA_params[1].get_value(), pickled_params[1].get_value()):
        print "numpy says that the bvis are not close"
    if not numpy.allclose(dA_params[2].get_value(), pickled_params[2].get_value()):
        print "numpy says that the bhid are not close"
    

    ############
    # Compare the two models #
    ##########    
    pickled_dA.set_input(x)
    cost, updates = pickled_dA.get_cost_updates(corruption_level=0.1, learning_rate=learning_rate)
    
    pickle_train_da = theano.function([index], cost, updates=updates,
         givens={x: train_set_x[index * batch_size:
                                (index + 1) * batch_size]})
    
    ############
    # Train the model for 3 epochs #
    ############

    # go through training epochs
    for epoch in xrange(training_epochs):
        # go through training set
        c = []
        for batch_index in xrange(n_train_batches):
            c.append(train_da(batch_index))    
    
    print "Passed create, pickle, unpickle, train test"
width = 128
strokes = 1000

# Transforms for our input images
img_transform = transforms.Compose([
    transforms.Resize(size=(width, width)),
    transforms.ToTensor()
])

# Loading Dataset of celebrity face images
#dataset = ImageFolder('C:\\Users\\Shawn\\Desktop\\NYU\\LTP_SVG\\one_image', transform=img_transform) # laptop
dataset = ImageFolder('/home/so1463/LearningToPaint/baseline/one_image', transform=img_transform) # cluster
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# AutoEncoder model
encoder = AutoEncoder().to(device)
#if os.path.exists('./AutoEncoder.pth'):
#    model.load_state_dict(torch.load('/home/so1463/LearningToPaint/baseline/AutoEncoder.pth'))

# Define our RNN model
RNN = RNN().to(device)


# Freeze weights of the renderer
renderer = FCN().to(device)
renderer.load_state_dict(torch.load(args.renderer))
renderer = renderer.to(device).eval()
for p in renderer.parameters():
    p.requires_grad = True

# Define optimizer and loss function
Пример #23
0
def start_transform(ae, path_src, path_tar, dim):
    # read data
    df = pd.read_csv(path_src, encoding='utf-8')
    print("data read")

    # extract als features
    cols_als = [col for col in df.columns if col.startswith("als_")]
    df_transform = df[cols_als]

    # start transforming
    df_result = ae.transform(df_transform, dim)

    # localization
    col_rm = df_transform.columns
    df_result.drop(col_rm, 1, inplace=True)
    df_result.to_csv(path_tar, encoding='utf-8')
    print(u"result written\n")


if __name__ == "__main__":
    train_path = './train.csv'
    test_path = './test.csv'
    result_path = './result.csv'
    encoding_dim = 30  # 压缩特征的维度

    AE = AutoEncoder()

    start_train(AE, train_path, encoding_dim)
    start_transform(AE, train_path, result_path, encoding_dim)
Пример #24
0
def test_pickled_sqe_dA(learning_rate=0.001,            
            pickle_file='/scratch/z/zhaolei/lzamparo/gpu_tests/dA_results/dA_sqe_pickle.save',
            corruption=0.1,
            training_epochs=3,
            batch_size=20):
    """ Test creating a dA model from scratch, training for a set number of epochs, pickle the model, unpickle, continue. """   

    current_dir = os.getcwd()    

    os.chdir(options.dir)
    today = datetime.today()
    day = str(today.date())
    hour = str(today.time())
    output_filename = "test_dA_squarederror_pickle." + day + "." + hour
    output_file = open(output_filename,'w')
    
    print >> output_file, "Run on " + str(datetime.now())    
    
    os.chdir(current_dir)
    
    data_set_file = openFile(str(options.inputfile), mode = 'r')
    datafiles, labels = extract_labeled_chunkrange(data_set_file, num_files = 10)
    datasets = load_data_labeled(datafiles, labels)
    train_set_x, train_set_y = datasets[0]
    data_set_file.close()

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_cols = train_set_x.get_value(borrow=True).shape[1]	

    # allocate symbolic variables for the data
    index = T.lscalar()    # index to a [mini]batch
    x = T.matrix('x')  # the data matrix
    
    ####################################
    # BUILDING THE MODEL #
    ####################################

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    da = AutoEncoder(numpy_rng=rng, theano_rng=theano_rng, input=x,
            n_visible=n_cols, n_hidden=1000, loss='squared')

    cost, updates = da.get_cost_updates(corruption_level=float(options.corruption),
                                        learning_rate=learning_rate)

    train_da = theano.function([index], cost, updates=updates,
         givens={x: train_set_x[index * batch_size:
                                (index + 1) * batch_size]})

    start_time = time.clock()

    ############
    # TRAINING #
    ############

    # go through training epochs
    for epoch in xrange(training_epochs):
        # go through training set
        c = []
        for batch_index in xrange(n_train_batches):
            c.append(train_da(batch_index))

        print >> output_file, 'Training epoch %d, cost ' % epoch, numpy.mean(c)

    end_time = time.clock()

    training_time = (end_time - start_time)

    print >> output_file, ('The 0 corruption code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((training_time) / 60.))  
    
    ############
    # Pickle #
    ############
    
    f = file(pickle_file, 'wb')
    cPickle.dump(da, f, protocol=cPickle.HIGHEST_PROTOCOL)
    f.close()
        
    ############
    # Unpickle the model, try to recover #
    ############
    
    f = file(pickle_file, 'rb')
    pickled_dA = cPickle.load(f)
    f.close()    
    
    x = T.matrix('x')
    pickled_dA.set_input(x)   
    
    ############
    # Resume training #
    ############        
    
    cost, updates = pickled_dA.get_cost_updates(corruption_level=float(options.corruption),
                                            learning_rate=learning_rate)
    
    train_da = theano.function([index], cost, updates=updates,
         givens={x: train_set_x[index * batch_size:
                                (index + 1) * batch_size]})

    start_time = time.clock()

    ############
    # TRAINING #
    ############

    # go through training epochs
    for epoch in xrange(training_epochs):
        # go through training set
        c = []
        for batch_index in xrange(n_train_batches):
            c.append(train_da(batch_index))

        print >> output_file, 'Training epoch %d, cost ' % epoch, numpy.mean(c)

    end_time = time.clock()

    training_time = (end_time - start_time)

    print >> output_file, ('The 0 corruption code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((training_time) / 60.))
    
    output_file.close()
Пример #25
0
def train_autoencoder():

    ## parses the provided parameters according to the command line input
    parser = argparse.ArgumentParser(prog='AutoEncoder', conflict_handler='resolve',description = '''\
        This script should enable the user to train his AutoEncoder according to the input parameters
        ''')
    parser.add_argument('-l', '--learningrate', type=float, default=0.025, required=False, help='The Learning Rate')
    parser.add_argument('-b', '--batchsize', type=int, default=20, required=False, help='Batch Size For Training')
    parser.add_argument('-h', '--reducedUnits', type=int, default=30,  required=False, help='Number of Reduced Layer Units')
    parser.add_argument('-o', '--output', type=str, default="out", required=False, help='Path To The Output Folder')
    parser.add_argument('-1', '--l1reg', type=float, default=0.1, required=False, help='Value For L1 Regularisaion')
    parser.add_argument('-k', '--kul_leib_penalty', type=float, default=0.04, required=False, help='Value For Kullback Leiber Divergence Penalty')
    parser.add_argument('-k', '--kul_leib_beta', type=float, default=1.0, required=False, help='Controls The Weight Of The Sparsity Penalty Term')
    parser.add_argument('-s', '--sparsity', type=str, default='l1reg', choices=['l1reg', 'kul_leib'], required=False, help='Choose Which Penalty Should Be Used')
    parser.add_argument('-e', '--epochs', type=int, default=500, required=False, help='Number Of Epochs')
    parser.add_argument('-m', '--momentum', type=float, default=0.9, required=False, help='The Momentum Rate')


    requiredNamed = parser.add_argument_group('Required Arguments')
    requiredNamed.add_argument('-d', '--dataset', type=str, required=True, help='Path To The Training Set (MNIST)')
   
    parsed = parser.parse_args()

    if parsed.sparsity == 'kul_leib':
        assert parsed.kul_leib_penalty < 0.05
        outpath_raw = parsed.output + "/kul_leib"
    else:
        outpath_raw = parsed.output + "/l1reg"

    if not os.path.exists(outpath_raw):
        os.makedirs(outpath_raw)

    (train_images, train_labels), (validation_images, validation_labels), \
         (test_images, test_labels) = LoadData.loadMNIST(parsed.dataset)#, shuffle=True)

    number_train_images_batches = train_images.get_value(borrow=True).shape[0] // parsed.batchsize
    number_test_images_batches = test_images.get_value(borrow=True).shape[0] // parsed.batchsize
    number_validation_images_batches = validation_images.get_value(borrow=True).shape[0] // parsed.batchsize

    index = T.lscalar() 
    imageData = T.matrix('imageData')

    rng = np.random.RandomState(1234)##numpy random range generator

    autoencoder = AutoEncoder(
        input=imageData,
        rng=rng,
        n_input=28*28, ##image 28x28
        n_reduced=parsed.reducedUnits,
        sparsity_param=parsed.kul_leib_penalty,
        beta=parsed.kul_leib_beta,
        n_reconstructed=28*28
    )

    if parsed.sparsity == 'l1reg':
        cost_sparse = (
            autoencoder.cost
            + parsed.l1reg * abs(autoencoder.reducedLayer.weights).sum()
        )
    else:
        cost_sparse = (
            autoencoder.cost + autoencoder.kul_leib
        )



    updates = (
        gradient_updates_momentum(cost_sparse, autoencoder.params, parsed.learningrate, parsed.momentum)
    )


    trainBatchGivenIndex = theano.function(
        inputs=[index],
        outputs= cost_sparse,
        updates= updates,
        givens={
            imageData: train_images[index * parsed.batchsize: (index + 1) * parsed.batchsize]
        }
    )

    validateBatchGivenIndex = theano.function(
        inputs=[index],
        outputs= cost_sparse,
        givens={
            imageData: validation_images[index * parsed.batchsize: (index + 1) * parsed.batchsize]
        }
    )

    patience = 5000
    patience_increase = 2
    improvement_threshold = 0.995 
    best_validation_loss = np.inf
    best_validation_epoch = 0

    val_freq = min(number_train_images_batches, patience // 2)
    epoch = 0

    # improvement_threshold = 0.995 
    # lowest_cost = np.inf
    # best_minibatch = -1
    # best_epoch = -1
    encoder_name = None
    if parsed.sparsity == 'l1reg':
        encoder_name = 'encoder_' + str(parsed.l1reg) + '_l1'
    else:
        encoder_name = 'encoder_' + str(parsed.kul_leib_beta) + '_kul_leib'
    
    done_looping = False
    while (epoch < parsed.epochs) and not (done_looping):
        epoch = epoch + 1
        for minibatch_index in range(number_train_images_batches):
            minibatch_squared_error_loss = trainBatchGivenIndex(minibatch_index)
            idx = (epoch - 1) * number_train_images_batches + minibatch_index

            if (idx + 1) % val_freq == 0:
                validation_losses = [validateBatchGivenIndex(currentValidationBatch)
                                     for currentValidationBatch in range(number_validation_images_batches)]
                this_validation_loss = np.mean(validation_losses)
                print("Epoch %d, Batch Index: %d / %d, Accuracy On Validation Samples: %f" \
                    % (epoch, minibatch_index,  number_train_images_batches, this_validation_loss))       
                
                if this_validation_loss < best_validation_loss:
                    if this_validation_loss < best_validation_loss *  \
                           improvement_threshold:
                        patience = max(patience, idx * patience_increase)
                        best_validation_epoch = epoch

                    autoencoder.save(outpath_raw, encoder_name)
                    lowest_cost = this_validation_loss
                    best_validation_loss = this_validation_loss
                    best_epoch = epoch
                    best_minibatch = minibatch_index
            if patience <= idx:
                done_looping = True
                break

    print('Saved Model With Respect To Epoch %d , Minibatch %d And Cost Of %f' % \
           (best_epoch, best_minibatch, lowest_cost))

    reconstruct_images = theano.function(
        inputs=[],
        outputs=autoencoder.reconstruction,
        givens={
            imageData: test_images[:100]
        }
    )

    reconstructed_images = reconstruct_images()
    reconstructed_images.reshape(100,28,28)# * 255


    outpath = None
    if parsed.sparsity == 'l1reg':
        outpath = outpath_raw + '/reconstruct_' + str(parsed.l1reg) + '_l1.png'
    else:
        outpath = outpath_raw + '/reconstruct_' + str(parsed.kul_leib_beta) + '_kul_leib.png'

    arraysToImgs(rows=10,colums=10,arr=reconstructed_images,path=outpath,out_shape=(28,28))
def AutoEncoder_demo(learning_rate=0.1, training_epochs=2, dataset='mnist.pkl.gz', batch_size=20, output_folder='dA_plots'):
   
    datasets = load_data(dataset)
    train_set_x, train_set_y = datasets[0]
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    index = T.lscalar()    # index to a [mini]batch
    x = T.matrix('x')  
    
    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    os.chdir(output_folder)
 
   
    #####################################
    # BUILDING THE MODEL CORRUPTION 0% #
    #####################################    
    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    da = AutoEncoder(np_rng=rng, theano_rng=theano_rng, input=x, n_vis=28 * 28, n_hid=500)
    cost, updates = da.get_cost_updates(corruption_level=0., learning_rate=learning_rate)
    train_da = theano.function(inputs=[index], 
                               outputs=[cost], 
                               updates=updates,
                               givens={x: train_set_x[index * batch_size: (index + 1) * batch_size]})

    start_time = time.clock()
    for epoch in xrange(training_epochs):
        # go through trainng set
        c = []
        for batch_index in xrange(n_train_batches):
            c.append(train_da(batch_index))

        print 'Training epoch %d, cost ' % epoch, numpy.mean(c)
    end_time = time.clock()

    training_time = (end_time - start_time)
    print >> sys.stderr, ('The no corruption code ran for %.2fm' % ((training_time) / 60.))
    image = PIL.Image.fromarray(tile_raster_images(X=da.W.get_value(borrow=True).T,
                                                   img_shape=(28, 28), tile_shape=(10, 10),
                                                   tile_spacing=(1, 1)))
    image.save('filters_corruption_0.jpg')


    #####################################
    # BUILDING THE MODEL CORRUPTION 30% #
    #####################################
    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    da = AutoEncoder(np_rng=rng, theano_rng=theano_rng, input=x, n_vis=28 * 28, n_hid=500)
    cost, updates = da.get_cost_updates(corruption_level=0.3, learning_rate=learning_rate)
    train_da = theano.function(inputs=[index], 
                               outputs=[cost], 
                               updates=updates,
                               givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]})

    start_time = time.clock()   
    for epoch in xrange(training_epochs):
        c = []
        for batch_index in xrange(n_train_batches):
            c.append(train_da(batch_index))

        print 'Training epoch %d, cost ' % epoch, numpy.mean(c)
    end_time = time.clock()
    
    training_time = (end_time - start_time)
    print >> sys.stderr, ('The 30 percent corruption code ran for %.2fm' % ((training_time) / 60.))
    image = PIL.Image.fromarray(tile_raster_images(X=da.W.get_value(borrow=True).T,
                                                   img_shape=(28, 28), tile_shape=(10, 10),
                                                   tile_spacing=(1, 1)))
    image.save('filters_corruption_30.jpg')

    os.chdir('../')
    print(cnn_accuricy)
    return cnn_accuricy

sess = tf.InteractiveSession()

training_data, training_label = read()
test_data, test_label = read(dataset="testing")

training_label_split = patitioning(training_label)
test_label_split = patitioning(test_label)

with tf.device('/cpu:0'):
 training_label_onehot, test_label_onehot = one_hot_labeles(training_label_split, max(training_label_split + 1),
                                                           test_label_split, max(test_label_split + 1))

autoencoder = AutoEncoder(numOfOutput=7, epochs=15)
decoder_op, layer_1 = autoencoder.initial_autoencode_network('encoder_h1', 'encoder_b1', 'decoder_h1', 'decoder_b1',autoencoder._X)
autoencoder.calculate_AutoEncoder(decoder_op, training_data, training_label, autoencoder._X)
decoder_op, layer_2 = autoencoder.initial_autoencode_network('encoder_h2', 'encoder_b2', 'decoder_h2', 'decoder_b2',layer_1)
autoencoder.calculate_AutoEncoder(decoder_op, training_data, training_label, layer_1)
decoder_op, layer_3 = autoencoder.initial_autoencode_network('encoder_h3', 'encoder_b3', 'decoder_h3', 'decoder_b3',layer_2)
autoencoder.calculate_AutoEncoder(decoder_op, training_data, training_label, layer_2)

y_ = autoencoder.initial_mlp_network()
autoencoder_accuricy, autoencoder_predict_label  = autoencoder.calculate_session(y_, training_data, training_label_onehot, test_data, test_label_onehot)
print(autoencoder_accuricy)
# split data and result for next layouts
Datasplit = splitData(training_data, training_label_split)
Datatest = splitData(test_data, autoencoder_predict_label)
DatasplitLabel = splitLabel(training_label, training_label_split)
DatatestLabel = splitLabel(test_label, autoencoder_predict_label)
Пример #28
0
from AutoEncoder import AutoEncoder
import numpy as np

x = [
    [[-1], [1], [1], [1]],
    [[1], [1], [1], [1]],
]
x = np.asarray(x)

# Build the auto-encoder
auto_encoder = AutoEncoder([3, 2], eta=0.05)
auto_encoder.assignX(x)
weights = auto_encoder.fit()

# Print the parameters
for i in range(len(weights)):
    print weights[i]
Пример #29
0
from AutoEncoder import AutoEncoder

data_set = numpy.loadtxt('../dataSets/hapt511', delimiter=',')

s_data = numpy.array(random.sample(data_set.tolist(), 256))
s_data = minmax_scale(s_data)
labels = s_data[:, -1]

_, cols = s_data.shape

s_index = random.sample(range(cols), 3)
s_data = s_data[:, s_index]

rows, cols = s_data.shape

ae = AutoEncoder()
ae.init_tf(fin=cols, fou=1, epochs=10)

results = ae.get_results(s_data)

split = random.uniform(min(results['inters']), max(results['inters']))

# print(results['inters'][numpy.where(results['inters'] > split)])

# print(results['weights'])
# print(results['biases'])
# print(results['inters'])
exit()

# split = (min(results['inters'])+max(results['inters'])) /2
Пример #30
0
def AutoEncoder_demo(learning_rate=0.1,
                     training_epochs=2,
                     dataset='mnist.pkl.gz',
                     batch_size=20,
                     output_folder='dA_plots'):

    datasets = load_data(dataset)
    train_set_x, train_set_y = datasets[0]
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')

    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    os.chdir(output_folder)

    #####################################
    # BUILDING THE MODEL CORRUPTION 0% #
    #####################################
    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2**30))

    da = AutoEncoder(np_rng=rng,
                     theano_rng=theano_rng,
                     input=x,
                     n_vis=28 * 28,
                     n_hid=500)
    cost, updates = da.get_cost_updates(corruption_level=0.,
                                        learning_rate=learning_rate)
    train_da = theano.function(
        inputs=[index],
        outputs=[cost],
        updates=updates,
        givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]})

    start_time = time.clock()
    for epoch in xrange(training_epochs):
        # go through trainng set
        c = []
        for batch_index in xrange(n_train_batches):
            c.append(train_da(batch_index))

        print 'Training epoch %d, cost ' % epoch, numpy.mean(c)
    end_time = time.clock()

    training_time = (end_time - start_time)
    print >> sys.stderr, ('The no corruption code ran for %.2fm' %
                          ((training_time) / 60.))
    image = PIL.Image.fromarray(
        tile_raster_images(X=da.W.get_value(borrow=True).T,
                           img_shape=(28, 28),
                           tile_shape=(10, 10),
                           tile_spacing=(1, 1)))
    image.save('filters_corruption_0.jpg')

    #####################################
    # BUILDING THE MODEL CORRUPTION 30% #
    #####################################
    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2**30))

    da = AutoEncoder(np_rng=rng,
                     theano_rng=theano_rng,
                     input=x,
                     n_vis=28 * 28,
                     n_hid=500)
    cost, updates = da.get_cost_updates(corruption_level=0.3,
                                        learning_rate=learning_rate)
    train_da = theano.function(
        inputs=[index],
        outputs=[cost],
        updates=updates,
        givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]})

    start_time = time.clock()
    for epoch in xrange(training_epochs):
        c = []
        for batch_index in xrange(n_train_batches):
            c.append(train_da(batch_index))

        print 'Training epoch %d, cost ' % epoch, numpy.mean(c)
    end_time = time.clock()

    training_time = (end_time - start_time)
    print >> sys.stderr, ('The 30 percent corruption code ran for %.2fm' %
                          ((training_time) / 60.))
    image = PIL.Image.fromarray(
        tile_raster_images(X=da.W.get_value(borrow=True).T,
                           img_shape=(28, 28),
                           tile_shape=(10, 10),
                           tile_spacing=(1, 1)))
    image.save('filters_corruption_30.jpg')

    os.chdir('../')
Пример #31
0
def test_AutoEncoder(learning_rate=0.1, training_epochs=15,
            batch_size=20):

    """

    :type learning_rate: float
    :param learning_rate: learning rate used for training the DeNosing
                          AutoEncoder

    :type training_epochs: int
    :param training_epochs: number of epochs used for training

  
    """


    x_scipySparse = None; train_set_x = None; numInstances = 0; numFeatures = 0;
    if((os.path.exists("input_scipySparse.obj"))):
        print "loading sparse data from pickled file..."
        f = open("input_scipySparse.obj", 'r')
        x_scipySparse = cPickle.load(f)
        f.close()
        numInstances, numFeatures = x_scipySparse.shape
        
    else: 
        print "extracting features and building sparse data..."
        fe = FeatureExtractor()  
        fe.extractFeatures()
        train_set_x = fe.instanceList
        featureDict = fe.featDict   
        numInstances = len(train_set_x)
        numFeatures = len(featureDict)        
        x_lil = sp.lil_matrix((numInstances,numFeatures), dtype='float32') # the data is presented as a sparse matrix 
        i = -1; v = -1;
        try:
            for i,instance in enumerate(train_set_x):
                for v in instance.input:
                    x_lil[i, v] = 1
        except:
            print "i=",i," v=",v
        x_scipySparse = x_lil.tocsc()
        f = open("input_scipySparse.obj", 'w')
        cPickle.dump(x_scipySparse, f, protocol=cPickle.HIGHEST_PROTOCOL)
        f.close()

    

    # compute number of mini-batches for training, validation and testing
    n_train_batches = numInstances / batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()    # index to a [mini]batch
    #x = sparse.basic.as_sparse_variable(x_scipySparse, 'x')
    x = theano.shared(x_scipySparse, borrow=True)

    
    ####################################
    # BUILDING THE MODEL               #
    ####################################

    print "building the model..."
    rng = numpy.random.RandomState(123)

    ae = AutoEncoder(numpy_rng=rng, input=x, n_visible=numFeatures, n_hidden=10, n_trainExs=numInstances)

    cost, updates = ae.get_cost_updates(corruption_level=0.,
                                        learning_rate=learning_rate)

    train_ae = theano.function([index], cost, updates=updates,
         givens={x: train_set_x[index * batch_size:
                                (index + 1) * batch_size]})

    start_time = time.clock()

    ############
    # TRAINING #
    ############

    # go through training epochs
    print "starting training..."
    for epoch in xrange(training_epochs):
        # go through training set
        c = []
        for batch_index in xrange(n_train_batches):
            c.append(train_ae(batch_index))

        print 'Training epoch %d, cost ' % epoch, numpy.mean(c)

    end_time = time.clock()

    training_time = (end_time - start_time)
    print "training completed in : ", training_time
def createLayer(index, numOfOutput):
    training_data = np.array(Datasplit[index], dtype=np.float32)
    training_label = np.array(DatasplitLabel[index], dtype=np.float32)
    test_data = np.array(Datatest[index], dtype=np.float32)
    test_label = np.array(DatatestLabel[index], dtype=np.float32)

    autoencoder = AutoEncoder(numOfOutput=numOfOutput, epochs=10)
    decoder_op, layer_1 = autoencoder.initial_autoencode_network('encoder_h1', 'encoder_b1', 'decoder_h1', 'decoder_b1',
                                                                 autoencoder._X)
    autoencoder.calculate_AutoEncoder(decoder_op, training_data, training_label, autoencoder._X)
    decoder_op, layer_2 = autoencoder.initial_autoencode_network('encoder_h2', 'encoder_b2', 'decoder_h2', 'decoder_b2',
                                                                 layer_1)
    autoencoder.calculate_AutoEncoder(decoder_op, training_data, training_label, layer_1)
    decoder_op, layer_3 = autoencoder.initial_autoencode_network('encoder_h3', 'encoder_b3', 'decoder_h3', 'decoder_b3',
                                                                 layer_2)
    autoencoder.calculate_AutoEncoder(decoder_op, training_data, training_label, layer_2)
    y_ = autoencoder.initial_mlp_network()
    with tf.device('/cpu:0'):
      training_label_onehot = sess.run(
            tf.one_hot(indices=training_label, depth=numOfOutput, dtype=np.float64))
      test_label_onehot = sess.run(
            tf.one_hot(indices=test_label, depth=numOfOutput, dtype=np.float64))
    cnn_accuricy, cnn_predict_label = autoencoder.calculate_session(y_, training_data, training_label_onehot, test_data,
                                                             test_label_onehot)
    print(cnn_accuricy)
    return cnn_accuricy
Пример #33
0
         transforms.ToTensor()])),
                    num_workers=2,
                    batch_size=1,
                    shuffle=False,
                    pin_memory=False)
val_LR = DataLoader(torchvision.datasets.ImageFolder(
    '/home/atharva/Datasets/DIV2K_VAL_LR',
    transform=transforms.Compose([transforms.ToTensor()])),
                    num_workers=2,
                    batch_size=1,
                    shuffle=False,
                    pin_memory=False)
'''
for (x,y), (x_d, y_d) in zip(data_HR, data_LR):
    print(x.shape)
    x = x.permute(2, 3, 1, 0)
    x = x.squeeze().detach().cpu().numpy()
    x_d = x_d.permute(2, 3, 1, 0)
    x_d = x_d.squeeze().detach().cpu().numpy()
    cv2.imshow('image', x)
    cv2.imshow('degraded', x_d)
    cv2.waitKey(delay=3000)
'''
model = AutoEncoder()
model.train_model(dataloader_HR=data_HR,
                  dataloader_LR=data_LR,
                  model=model,
                  epochs=75,
                  val_HR=val_HR,
                  val_LR=val_LR)
Пример #34
0
import os

SR = 44100

if __name__ == '__main__':
    checkpoint_path = "models/cp.ckpt"
    checkpoint_dir = os.path.dirname(checkpoint_path)

    sample_shape = (16, 18)     # imaginary shape for single spectrogram
    from AutoEncoder import AutoEncoder
    ae = AutoEncoder()
    ae.compile(input_shape=sample_shape)

    gen_train = []
    gen_val = []
    _history = ae.fit_gen(gen_train, gen_val, checkpoint_dir, epochs=100, batch_size=batch_size)
Пример #35
0
f = gzip.open(fname, 'rb')
train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
f.close()

print("Partitioning Data")
X, y = train_set
X = np.rint(X * 256).astype(np.int).reshape(
    (-1, 1, 28, 28))  # convert to (0,255) int range (we'll do our own scaling)
mu, sigma = np.mean(X.flatten()), np.std(X.flatten())
X_train = X.astype(np.float64)
X_train = (X_train - mu) / sigma
X_train = X_train.astype(np.float32)
X_out = X_train.reshape((X_train.shape[0], -1))

print("Begin Training")
epochs = 20
ae = AutoEncoder(
    update_learning_rate=0.01,
    update_momentum=0.975,
    batch_iterator_train=FlipBatchIterator(batch_size=128),
    regression=True,
    max_epochs=epochs,
    verbose=1,
)
ae.fit(X_train, X_out, 5)

print("Saving Parameters")
ae.save_params_to("./data/conv_ae.np")

print("Done")
Пример #36
0
def get_random_images():
    index = np.random.randint(5000)

    original_image = Image.fromarray(get_picture_array(X, index))
    new_size = (original_image.size[0] * 2, original_image.size[1])
    new_im = Image.new('L', new_size)
    new_im.paste(original_image, (0, 0))
    rec_image = Image.fromarray(get_picture_array(X_pred, index))
    new_im.paste(rec_image, (original_image.size[0], 0))
    new_im.save('images/orig.png', format="PNG")


print("Loading Autoencoder")
ae = AutoEncoder(update_learning_rate=0.01,
                 update_momentum=0.975,
                 batch_iterator_train=FlipBatchIterator(batch_size=128),
                 regression=True,
                 max_epochs=20)
ae.load_params_from("./data/conv_ae.np")

print("Unpickling MNIST")
fname = './data/mnist.pkl.gz'
f = gzip.open(fname, 'rb')
train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
f.close()

print("Partitioning Data")
X, y = train_set
X = np.rint(X * 256).astype(np.int).reshape(
    (-1, 1, 28, 28))  # convert to (0,255) int range (we'll do our own scaling)
mu, sigma = np.mean(X.flatten()), np.std(X.flatten())
Пример #37
0
def drive_dA(learning_rate=0.1, training_epochs=15,
            dataset='../data/mnist.pkl.gz',
            batch_size=20):
    """
        This demo is tested on MNIST
    
        :type learning_rate: float
        :param learning_rate: learning rate used for training the DeNosing
                              AutoEncoder
    
        :type training_epochs: int
        :param training_epochs: number of epochs used for training
    
        :type dataset: string
        :param dataset: path to the picked dataset
    
    """
    parser = OptionParser()
    parser.add_option("-d", "--dir", dest="dir", help="test output directory")
    parser.add_option("-c", "--corruption", dest="corruption", help="use this amount of corruption for the denoising AE")
    
    (options, args) = parser.parse_args()    

    current_dir = os.getcwd()    

    os.chdir(options.dir)
    today = datetime.today()
    day = str(today.date())
    hour = str(today.time())
    output_filename = "denoising_autoencoder_mnist." + day + "." + hour
    output_file = open(output_filename,'w')
    
    print >> output_file, "Run on " + str(datetime.now())    
    
    os.chdir(current_dir)
    datasets = load_data(dataset)
    train_set_x, train_set_y = datasets[0]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()    # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    
    ####################################
    # BUILDING THE MODEL NO CORRUPTION #
    ####################################

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    da = AutoEncoder(numpy_rng=rng, theano_rng=theano_rng, input=x,
            n_visible=28 * 28, n_hidden=500)

    cost, updates = da.get_cost_updates(corruption_level=0.,
                                        learning_rate=learning_rate)

    train_da = theano.function([index], cost, updates=updates,
         givens={x: train_set_x[index * batch_size:
                                (index + 1) * batch_size]})

    start_time = time.clock()

    ############
    # TRAINING #
    ############

    # go through training epochs
    for epoch in xrange(training_epochs):
        # go through training set
        c = []
        for batch_index in xrange(n_train_batches):
            c.append(train_da(batch_index))

        print >> output_file, 'Training epoch %d, cost ' % epoch, numpy.mean(c)

    end_time = time.clock()

    training_time = (end_time - start_time)

    print >> output_file, ('The 0 corruption code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((training_time) / 60.))    
    
            
    ##########
    # Build the model, with corruption 
    ##########
    
    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    da = AutoEncoder(numpy_rng=rng, theano_rng=theano_rng, input=x,
            n_visible=28 * 28, n_hidden=500)

    cost, updates = da.get_cost_updates(corruption_level=float(options.corruption),
                                        learning_rate=learning_rate)

    train_da = theano.function([index], cost, updates=updates,
         givens={x: train_set_x[index * batch_size:
                                (index + 1) * batch_size]})

    start_time = time.clock()
    
    ##########
    # Train the model
    ##########
    # go through training epochs
    for epoch in xrange(training_epochs):
        # go through trainng set
        c = []
        for batch_index in xrange(n_train_batches):
            c.append(train_da(batch_index))

        print >> output_file, 'Training epoch %d, cost ' % epoch, numpy.mean(c)

    end_time = time.clock()

    training_time = (end_time - start_time)

    print >> output_file, ('The ' + str(options.corruption) + '% corruption code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((training_time) / 60.))

    output_file.close()
Пример #38
0
training_data, training_label = read()
test_data, test_label = read(dataset="testing")

with tf.device('/cpu:0'):
    # One hot encoding
    training_label = sess.run(
        tf.one_hot(indices=training_label,
                   depth=max(training_label + 1),
                   dtype=np.float64))
    test_label = sess.run(
        tf.one_hot(indices=test_label,
                   depth=max(test_label + 1),
                   dtype=np.float64))

autoencoder = AutoEncoder()
decoder_op, layer_1 = autoencoder.initial_autoencode_network(
    'encoder_h1', 'encoder_b1', 'decoder_h1', 'decoder_b1', autoencoder._X)
autoencoder.calculate_AutoEncoder(decoder_op, training_data, training_label,
                                  autoencoder._X)
decoder_op, layer_2 = autoencoder.initial_autoencode_network(
    'encoder_h2', 'encoder_b2', 'decoder_h2', 'decoder_b2', layer_1)
autoencoder.calculate_AutoEncoder(decoder_op, training_data, training_label,
                                  layer_1)
decoder_op, layer_3 = autoencoder.initial_autoencode_network(
    'encoder_h3', 'encoder_b3', 'decoder_h3', 'decoder_b3', layer_2)
autoencoder.calculate_AutoEncoder(decoder_op, training_data, training_label,
                                  layer_2)

y_ = autoencoder.initial_mlp_network()
cnn_accuricy, cnn_predict_label = autoencoder.calculate_session(
Пример #39
0
train_set, valid_set, test_set = pickle.load(f,encoding='latin1')
f.close()

print("Partitioning Data")
X, y = train_set
X = np.rint(X * 256).astype(np.int).reshape((-1, 1, 28, 28))  # convert to (0,255) int range (we'll do our own scaling)
mu, sigma = np.mean(X.flatten()), np.std(X.flatten())
X_train = X.astype(np.float64)
X_train = (X_train - mu) / sigma
X_train = X_train.astype(np.float32)
X_out = X_train.reshape((X_train.shape[0], -1))


print("Begin Training")
epochs = 20
ae = AutoEncoder(
    update_learning_rate = 0.01,
    update_momentum = 0.975,
    batch_iterator_train=FlipBatchIterator(batch_size=128),
    regression=True,
    max_epochs= epochs,
    verbose=1,
    )
ae.fit(X_train, X_out,5)

print("Saving Parameters")
ae.save_params_to("./data/conv_ae.np")


print("Done")
Пример #40
0
    index = np.random.randint(5000)

    original_image = Image.fromarray(get_picture_array(X, index))
    new_size = (original_image.size[0] * 2, original_image.size[1])
    new_im = Image.new('L', new_size)
    new_im.paste(original_image, (0,0))
    rec_image = Image.fromarray(get_picture_array(X_pred, index))
    new_im.paste(rec_image, (original_image.size[0],0))
    new_im.save('images/orig.png', format="PNG")


print("Loading Autoencoder")
ae = AutoEncoder(
    update_learning_rate = 0.01,
    update_momentum = 0.975,
    batch_iterator_train=FlipBatchIterator(batch_size=128),
    regression=True,
    max_epochs= 20
    )
ae.load_params_from("./data/conv_ae.np")

print("Unpickling MNIST")
fname = './data/mnist.pkl.gz'
f = gzip.open(fname, 'rb')
train_set, valid_set, test_set = pickle.load(f,encoding='latin1')
f.close()

print("Partitioning Data")
X, y = train_set
X = np.rint(X * 256).astype(np.int).reshape((-1, 1, 28, 28))  # convert to (0,255) int range (we'll do our own scaling)
mu, sigma = np.mean(X.flatten()), np.std(X.flatten())
Пример #41
0
class DLForest:
    def __init__(self):
        self.sample_size = 0
        self.height_limit = 0
        self.leaf_size_limit = 10
        self.attr_sample_size = 1
        self.forest_size_limit = 10
        self.forest = list()
        self.ae = None

    def setup(self,
              sample_size=128,
              height_limit=6,
              leaf_size_limit=10,
              attr_sample_size=2,
              forest_size_limit=5):
        self.sample_size = sample_size
        self.height_limit = height_limit
        self.leaf_size_limit = leaf_size_limit
        self.attr_sample_size = attr_sample_size
        self.forest_size_limit = forest_size_limit

    def build(self, data):
        i = 0
        self.ae = AutoEncoder()
        self.ae.init_tf(fin=self.attr_sample_size,
                        fou=1,
                        epochs=10,
                        l_rate=0.01)
        while i < self.forest_size_limit:
            sample_data = numpy.array(random.sample(data, self.sample_size))
            tree = self.build_tree(sample_data, 0)
            self.forest.append(tree)
            i += 1

    def build_tree(self, data, curH):
        rows, cols = data.shape
        if rows <= self.leaf_size_limit or curH >= self.height_limit:
            node = Node()
            node.size = rows
            node.external = True
            return node
        else:
            sample_index = random.sample(range(cols), self.attr_sample_size)
            results = self.ae.get_results(data[:, sample_index])
            node = Node()
            node.index = sample_index
            node.size = rows
            node.weights = results['weights']
            node.bias = results['biases']
            node.split = random.uniform(min(results['inters']),
                                        max(results['inters']))
            left, right = self.split_data(results['inters'], node.split)
            node.left = self.build_tree(data[left, :], curH + 1)
            node.right = self.build_tree(data[right, :], curH + 1)
            return node

    def split_data(self, data, split_point):
        left, right = list(), list()
        for i in range(len(data)):
            if data[i] <= split_point:
                left.append(i)
            else:
                right.append(i)
        return left, right

    def evaluate(self, data, hlimit=6):
        scores = list()
        for item in data:
            score = 0
            for tree in self.forest:
                score += self.path(item, tree, curH=0, hlimit=hlimit)
            score /= self.forest_size_limit
            scores.append(score)
        return scores

    def path(self, instance, tree, curH, hlimit):
        if tree.external is True or curH >= hlimit:
            return curH + self.cost(tree.size)
        tag = sum(instance[tree.index] * tree.weights) + tree.bias
        if tag <= tree.split:
            return self.path(instance, tree.left, curH + 1, hlimit)
        else:
            return self.path(instance, tree.right, curH + 1, hlimit)

    def cost(self, size):
        if size == 2:
            return 1
        elif size < 2:
            return 0
        else:
            return 2 * log(size - 1, 2) - float(2 * (size - 1)) / size

    def show(self, tree):
        if tree is None:
            print('is None')
            return None
        print(tree.size)
        self.show(tree.left)
        self.show(tree.right)