예제 #1
0
    def assessWithoutDist(self):
        print "reading data..."
        pictures = readCsv(self.dataBaseUrl + "data", self.totalNum)

        dao = ImageDao()
        imgs = dao.getAll()
        typeDict = {}
        for img in imgs:
            typeDict[img.imgId] = img.imgType

        print "training..."
        trainSet = pictures[:self.trainNum]
        self.knn.train(trainSet)

        testSet = pictures[self.trainNum:self.totalNum]

        accuracyList = []
        heads = ['distance', 'accuracy', 'averageK']

        print "predicting..."
        for d in range(2000, 4000, 20):
            accuracy, avgK = self.knn.predictForManyWithDist(
                testSet, self.trainNum, d, typeDict)

            item = [d, accuracy, avgK]
            accuracyList.append(item)

            print "distance:%d     accuracy:%f%%       averageK:%f" % (
                d, accuracy * 100, avgK)

        saveCsv(self.resultBasePath + 'assessDist_Radius10_5000-1000.csv',
                heads, accuracyList)
예제 #2
0
    def train(self):
        # Prediction
        self.y_pred = self.decoder_op
        # Targets (Labels) are the input data.
        y_true = self.X

        # Define loss and optimizer, minimize the squared error
        epsilon = 10e-6
        cost = tf.reduce_mean(tf.pow(y_true - self.y_pred + epsilon,
                                     2))**(1 / 2)
        """

        predictions=self.y_pred
        float_labels = tf.cast(y_true, tf.float32)
        cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + (
                                                                                1 - float_labels) * tf.log(
            1 - predictions + epsilon)
        cross_entropy_loss = tf.negative(cross_entropy_loss)
        cost=tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))
        """
        optimizer = tf.train.RMSPropOptimizer(
            self.learning_rate).minimize(cost)

        # Initializing the variables
        init = tf.global_variables_initializer()

        saver = tf.train.Saver()
        self.sess.run(init)

        costs = []
        total_batch = int(self.dataSet.shape[0] / self.batch_size)
        # Training cycle
        for epoch in range(self.training_epochs):
            # Loop over all batches
            for i in range(total_batch):
                batch_xs = self.dataSet[i * self.batch_size:(i + 1) *
                                        self.batch_size]
                # Run optimization op (backprop) and cost op (to get loss value)
                _, c = self.sess.run([optimizer, cost],
                                     feed_dict={self.X: batch_xs})
                # Display logs per epoch step
                if i % self.display_step == 0:
                    print "Epoch:%04d" % (epoch + 1), "batch:%04d" % (
                        i + 1), "cost=", "{:.9f}".format(c)
                    val = [(epoch + 1), i, c]
                    costs.append(val)
        saver.save(self.sess, 'network/model/model.ckpt')
        heads = ['epoch', 'cost']
        saveCsv(fileName="network/netCost.csv", heads=heads, datas=costs)

        print("Optimization Finished!")
예제 #3
0
    def assessWithoutRadius(self, k=None):
        if not k:
            k = self.k
        accuracyList = []
        heads = ['radius', 'accuracy', 'averageCriticalDist']

        for radius in range(5, 15):
            print radius, ':'
            print "blurring ..."
            data = MyData()
            data.saveCsvWithGaussianBlur(radius=radius)

            dao = ImageDao()
            imgs = dao.getAll()
            typeDict = {}
            for img in imgs:
                typeDict[img.imgId] = img.imgType

            zerNp = np.zeros([k, self.testNum])
            testLabel = np.arange(self.trainNum, self.totalNum)
            for i in range(len(testLabel)):
                testLabel[i] = typeDict[str(testLabel[i]).zfill(5)]
            testLabel = (zerNp + testLabel).astype('int').T

            pictures = readCsv(self.dataBaseUrl + "data", self.totalNum)

            print "training..."
            trainSet = pictures[:self.trainNum]
            self.knn.train(trainSet)

            testSet = pictures[self.trainNum:self.totalNum]

            print "predicting..."
            accuracy, avgCriDist = self.knn.predictForManyWithK(
                testSet, testLabel, k, typeDict)

            item = [radius, accuracy, avgCriDist]
            accuracyList.append(item)

            print "k:%d     radius:%f    accuracy:%f%%   averageCriticalDist:%f" % (
                k, radius, accuracy * 100, avgCriDist)

        saveCsv(self.resultBasePath + 'assessRadiusK' + str(k) + '.csv', heads,
                accuracyList)
예제 #4
0
    def assessFeaWithBlurWithoutK(self):
        print "reading neaFeaData..."
        netFea = readCsv(self.dataBaseUrl + "netFea", self.totalNum) * 256 * 40
        print "reading data..."
        pictures = readCsv(self.dataBaseUrl + "data", self.totalNum)
        pictures = np.append(pictures, netFea, axis=1)
        print pictures.shape
        dao = ImageDao()
        imgs = dao.getAll()
        typeDict = {}
        for img in imgs:
            typeDict[img.imgId] = img.imgType

        print "training..."
        trainSet = pictures[:self.trainNum]
        self.knn.train(trainSet)

        testSet = pictures[self.trainNum:self.totalNum]

        accuracyList = []
        heads = ['k', 'accuracy', 'averageCriticalDist']

        print "predicting..."
        for k in range(1, 101):
            zerNp = np.zeros([k, self.testNum])
            testLabel = np.arange(self.trainNum, self.totalNum)
            for i in range(len(testLabel)):
                testLabel[i] = typeDict[str(testLabel[i]).zfill(5)]
            testLabel = (zerNp + testLabel).astype('int').T

            accuracy, avgCriDist = self.knn.predictForManyWithK(
                testSet, testLabel, k, typeDict)

            item = [k, accuracy, avgCriDist]
            accuracyList.append(item)

            print "k:%d     accuracy:%f%%       averageCriticalDist:%f" % (
                k, accuracy * 100, avgCriDist)

        saveCsv(self.resultBasePath + 'assessFeaWithBlurWithoutK.csv', heads,
                accuracyList)
예제 #5
0
def test_cA(train_set_x,
            n_hidden=None,
            learning_rate=0.01,
            training_epochs=20,
            batch_size=10,
            output_folder='network',
            contraction_level=.1):
    """
    learning_rate:梯度下降法的学习率

    training_epochs: 最大迭代次数

    contraction_level:为正则项的权重

    """

    # 批量下降法,训练的批数
    n_train_batches = train_set_x.shape[0] / batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # 每一批训练数据的索引
    x = T.matrix('x')  # 每一批训练数据

    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    os.chdir(output_folder)

    rng = numpy.random.RandomState(123)

    ca = AutoEnc_Theano(numpy_rng=rng,
                        input=x,
                        n_visible=50 * 50 * 3,
                        n_hidden=n_hidden,
                        n_batchsize=batch_size)

    cost, updates = ca.get_cost_updates(contraction_level=contraction_level,
                                        learning_rate=learning_rate)
    # 每一批,训练更新函数,输入参数index
    train_ca = theano.function(
        [index], [T.mean(ca.L_rec), ca.L_jacob],
        updates=updates,
        givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]})

    start_time = timeit.default_timer()

    ############
    # TRAINING #
    ############

    costs = []
    # go through training epochs
    for epoch in xrange(training_epochs):
        # go through trainng set
        c = []
        for batch_index in xrange(n_train_batches):
            print 'epoch:%d    batch_index:%d' % (epoch, batch_index)
            c.append(train_ca(batch_index))

        c_array = numpy.vstack(c)
        cost = [
            epoch,
            numpy.mean(c_array[0]),
            numpy.mean(numpy.sqrt(c_array[1]))
        ]
        costs.append(cost)
        print 'Training epoch %d, reconstruction cost ' % cost[0], numpy.mean(
            cost[1]), ' jacobian norm ', cost[2]

    heads = ['epoch', 'cost', 'jacobian']
    saveCsv(fileName="layer_1Cost.csv", heads=heads, datas=costs)

    end_time = timeit.default_timer()

    training_time = (end_time - start_time)

    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((training_time) / 60.))

    image = Image.fromarray(
        tile_raster_images(X=ca.W.get_value(borrow=True).T,
                           img_shape=(50, 50 * 3),
                           tile_shape=(10, 10),
                           tile_spacing=(1, 1)))

    image.save('cae_filters.png')
    numpy.savetxt('layer_1W.csv', ca.W.get_value(borrow=True), delimiter=',')
    os.chdir('../')