def test_dA(train_set_x, train_set_x_feature_num,learning_rate=0.1, training_epochs=8,
            batch_size=5, da_object = None):
			#改batch_size也要改ml.py里面的b_s

    # print train_set_x
    # accessing the third minibatch of the training set
    train_set_x = theano.shared(numpy.array(train_set_x))

    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size #求出batch的个数
    #print n_train_batches				#这里是50000 / 20 = 2500
    index = T.lscalar()    # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images

    # 使用denoise时
    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))
    # da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x,
    #         n_visible=28 * 28, n_hidden=500) # 创建dA对象时,并不需要数据x,只是给对象da中的一些网络结构参数赋值

    if da_object is None:
        da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x,
                n_visible= train_set_x_feature_num, n_hidden= train_set_x_feature_num*3/4) # 创建dA对象时,并不需要数据x,只是给对象da中的一些网络结构参数赋值
    else:
        #da = da_object	
        da = dA(numpy_rng=da_object.n_rng, theano_rng=da_object.theano_rng, input=x,
                n_visible= train_set_x_feature_num, n_hidden= train_set_x_feature_num*3/4,
                W = da_object.W, bvis = da_object.b_prime, bhid = da_object.b) # 创建dA对象时,并不需要数据x,只是给对象da中的一些网络结构参数赋值


    cost, updates, y = da.get_cost_updates(corruption_level=0.3,
                                        learning_rate=learning_rate)

    train_da = theano.function([index], y, updates=updates, #theano.function()为定义一个符号函数,这里的自变量为indexy,指定input 参数是index(加了括号),指定output是cost,指定函数名称是上面的get_cost_updates
         givens={x: train_set_x[index * batch_size: (index + 1) * batch_size]} ) #输出变量为cost


    for epoch in xrange(training_epochs):
        hiddeny = numpy.array([])
        for batch_index in xrange(n_train_batches):
            yy = train_da(batch_index)

            sys.stdout.write("step:%d" % (batch_index + 1) + ' / ' + "%d" % n_train_batches + "         %d%%" % ((batch_index + 1) * 100 / n_train_batches)  + '\r')
            sys.stdout.flush()
            if hiddeny.size == 0:
                hiddeny = yy
            else:
                hiddeny = numpy.concatenate((hiddeny, yy))

    print '\n'
    return hiddeny, da
示例#2
0
    def __init__(
        self,
        rng,
        theano_rng,
        input,
        n_in,
        n_hidden,
        n_out,
        f_load_MLP,
        f_load_DA=None,
    ):
        self.x = input
        self.da = dA(                
            numpy_rng=rng,
            theano_rng=theano_rng,
            input=self.x,
            n_visible=28 * 28,
            n_hidden=n_hidden,
            f_load=f_load_DA)
        self.mlp = MLP(
            rng=rng,
            input=self.da.output,
            f_load = f_load_MLP
        )

        self.params = self.da.params
        self.output = self.da.output
        self.L1 = abs(self.da.W).sum()
        self.L2 = (self.da.W ** 2).sum()
def test_ndA(learning_rate=0.1):
    rng = numpy.random.RandomState(123)

    maxs = numpy.ones((111, )) * -numpy.Inf
    mins = numpy.ones((111, )) * numpy.Inf

    with open('datasets\\physMIMCsv.csv', 'rt') as csvin:
        csvin = csv.reader(csvin, delimiter=',')
        data = list()
        for i, row in enumerate(csvin):
            if i % 10000 == 0:
                print(i)
            if i > 0:  #not header
                x = numpy.asarray(row[64:175]).astype(float)
                data.append([x])
                maxs[x > maxs] = x[x > maxs]
                mins[x < mins] = x[x < mins]

    # construct dA
    da = dA(n_visible=112, n_hidden=30, rng=rng)

    pred = list()
    for i, x in enumerate(data):
        x[0] = (x[0] + mins) / (maxs - mins)
        if i % 10000 == 0:
            print(i)
        if i < 1750648:
            da.train(input=numpy.array(x),
                     lr=learning_rate,
                     corruption_level=0.1)
        else:
            p = da.reconstruct(x)
            mse = ((p[0] - x[0])**2).mean(axis=0)
            pred.append(mse)
    def fit(self,X):
        start_time = timeit.default_timer()
        self.n_visible = X.shape[1]

        train_set_x = self.load_data(X)

        # compute number of minibatches for training, validation and testing
        n_train_batches = train_set_x.get_value(borrow=True).shape[0]

        # allocate symbolic variables for the data
        index = T.lscalar()    # index to a [mini]batch

        rng = numpy.random.RandomState(123)
        theano_rng = RandomStreams(rng.randint(2 ** 30))

        self.da = dA(
            numpy_rng=rng,
            theano_rng=theano_rng,
            input=self.x,
            n_visible=self.n_visible,
            n_hidden=self.n_hidden
        )

        cost, updates = self.da.get_cost_updates(
            corruption_level=self.corruption_level,
            learning_rate=self.learning_rate
        )

        train_da = theano.function(
            [index],
            cost,
            updates=updates,
            givens={
                self.x: train_set_x[index * self.batch_size: (index + 1) * self.batch_size]
            }
        )

        # training...
        for epoch in range(0,self.training_epochs):
            c = []
            for batch_index in range(0,
n_train_batches):
                c.append(train_da(batch_index))

            if self.verbose:
                print('Training epoch %d, cost ' % epoch, numpy.mean(c))

        end_time = timeit.default_timer()
        training_time = (end_time - start_time)

        print(("The %d%% corruption code " % (self.corruption_level*100) +
               ' ran for %.2fm' % (training_time / 60.)), file=sys.stderr)
    def fit(self,X):
        start_time = timeit.default_timer()
        self.n_visible = X.shape[1]

        train_set_x = self.load_data(X)

        # compute number of minibatches for training, validation and testing
        n_train_batches = train_set_x.get_value(borrow=True).shape[0]

        # allocate symbolic variables for the data
        index = T.lscalar()    # index to a [mini]batch

        rng = numpy.random.RandomState(123)
        theano_rng = RandomStreams(rng.randint(2 ** 30))

        self.da = dA(
            numpy_rng=rng,
            theano_rng=theano_rng,
            input=self.x,
            n_visible=self.n_visible,
            n_hidden=self.n_hidden
        )

        cost, updates = self.da.get_cost_updates(
            corruption_level=self.corruption_level,
            learning_rate=self.learning_rate
        )

        train_da = theano.function(
            [index],
            cost,
            updates=updates,
            givens={
                self.x: train_set_x[index * self.batch_size: (index + 1) * self.batch_size]
            }
        )

        # training...
        for epoch in xrange(self.training_epochs):
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(train_da(batch_index))

            if self.verbose:
                print('Training epoch %d, cost ' % epoch, numpy.mean(c))

        end_time = timeit.default_timer()
        training_time = (end_time - start_time)

        print(("The %d%% corruption code " % (self.corruption_level*100) +
               ' ran for %.2fm' % (training_time / 60.)), file=sys.stderr)
示例#6
0
    def fit(self, X):
        self.n_visible = X.shape[1]

        train_set_x = self.load_data(X)

        # compute number of minibatches for training, validation and testing
        n_train_batches = train_set_x.get_value(
            borrow=True).shape[0] / self.batch_size

        # allocate symbolic variables for the data
        index = T.lscalar()  # index to a [mini]batch

        rng = numpy.random.RandomState(123)
        theano_rng = RandomStreams(rng.randint(2**30))

        self.da = dA(numpy_rng=rng,
                     theano_rng=theano_rng,
                     input=self.x,
                     n_visible=self.n_visible,
                     n_hidden=self.n_hidden)

        cost, updates = self.da.get_cost_updates(
            corruption_level=self.corruption_level,
            learning_rate=self.learning_rate)

        train_da = theano.function(
            [index],
            cost,
            updates=updates,
            givens={
                self.x:
                train_set_x[index * self.batch_size:(index + 1) *
                            self.batch_size]
            })

        # training...
        for epoch in xrange(self.training_epochs):
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(train_da(batch_index))

            if self.verbose:
                print 'Training epoch %d, cost ' % epoch, numpy.mean(c)
def test_dA(learning_rate=0.1, corruption_level=0.0, training_epochs=50):
    data = numpy.array(
        [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

    rng = numpy.random.RandomState(123)

    # construct dA
    da = dA(input=data, n_visible=20, n_hidden=7, rng=rng)

    # train
    for epoch in range(training_epochs):
        da.train(lr=learning_rate, corruption_level=corruption_level)
        # cost = da.negative_log_likelihood(corruption_level=corruption_level)
        # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, cost
        # learning_rate *= 0.95

    # test
    x = numpy.array(
        [[1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1],
         [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0]])
示例#8
0
    def __init__(
        self,
        rng,
        theano_rng,
        input,
        n_in,
        n_hidden,
        n_out,
        f_load_MLP,
        f_load_DA=None,
    ):
        self.x = input
        self.da = dA(numpy_rng=rng,
                     theano_rng=theano_rng,
                     input=self.x,
                     n_visible=28 * 28,
                     n_hidden=n_hidden,
                     f_load=f_load_DA)
        self.mlp = MLP(rng=rng, input=self.da.output, f_load=f_load_MLP)

        self.params = self.da.params
        self.output = self.da.output
        self.L1 = abs(self.da.W).sum()
        self.L2 = (self.da.W**2).sum()