示例#1
0
    def train(self, trainSet: InstanceList, parameters: Parameter):
        """
        Training algorithm for the linear discriminant analysis classifier (Introduction to Machine Learning, Alpaydin,
        2015).

        PARAMETERS
        ----------
        trainSet : InstanceList
            Training data given to the algorithm.
        parameters : Parameter
            Parameter of the Lda algorithm.
        """
        w0 = {}
        w = {}
        priorDistribution = trainSet.classDistribution()
        classLists = Partition(trainSet)
        covariance = Matrix(trainSet.get(0).continuousAttributeSize(), trainSet.get(0).continuousAttributeSize())
        for i in range(classLists.size()):
            averageVector = Vector(classLists.get(i).continuousAverage())
            classCovariance = classLists.get(i).covariance(averageVector)
            classCovariance.multiplyWithConstant(classLists.get(i).size() - 1)
            covariance.add(classCovariance)
        covariance.divideByConstant(trainSet.size() - classLists.size())
        covariance.inverse()
        for i in range(classLists.size()):
            Ci = classLists.get(i).getClassLabel()
            averageVector = Vector(classLists.get(i).continuousAverage())
            wi = covariance.multiplyWithVectorFromRight(averageVector)
            w[Ci] = wi
            w0i = -0.5 * wi.dotProduct(averageVector) + math.log(priorDistribution.getProbability(Ci))
            w0[Ci] = w0i
        self.model = LdaModel(priorDistribution, w, w0)
示例#2
0
class MatrixTest(unittest.TestCase):
    def setUp(self):
        self.small = Matrix(3, 3)
        for i in range(3):
            for j in range(3):
                self.small.setValue(i, j, 1.0)
        self.v = Vector(3, 1.0)
        self.large = Matrix(1000, 1000)
        for i in range(1000):
            for j in range(1000):
                self.large.setValue(i, j, 1.0)
        self.medium = Matrix(100, 100)
        for i in range(100):
            for j in range(100):
                self.medium.setValue(i, j, 1.0)
        self.V = Vector(1000, 1.0)
        self.vr = Vector(100, 1.0)
        self.random = Matrix(100, 100, 1, 10, 1)
        self.originalSum = self.random.sumOfElements()
        self.identity = Matrix(100)

    def test_ColumnWiseNormalize(self):
        mClone = self.small.clone()
        mClone.columnWiseNormalize()
        self.assertEqual(3, mClone.sumOfElements())
        MClone = self.large.clone()
        MClone.columnWiseNormalize()
        self.assertAlmostEqual(1000, MClone.sumOfElements(), 3)
        self.identity.columnWiseNormalize()
        self.assertEqual(100, self.identity.sumOfElements())

    def test_MultiplyWithConstant(self):
        self.small.multiplyWithConstant(4)
        self.assertEqual(36, self.small.sumOfElements())
        self.small.divideByConstant(4)
        self.large.multiplyWithConstant(1.001)
        self.assertAlmostEqual(1001000, self.large.sumOfElements(), 3)
        self.large.divideByConstant(1.001)
        self.random.multiplyWithConstant(3.6)
        self.assertAlmostEqual(self.originalSum * 3.6,
                               self.random.sumOfElements(), 4)
        self.random.divideByConstant(3.6)

    def test_DivideByConstant(self):
        self.small.divideByConstant(4)
        self.assertEqual(2.25, self.small.sumOfElements())
        self.small.multiplyWithConstant(4)
        self.large.divideByConstant(10)
        self.assertAlmostEqual(100000, self.large.sumOfElements(), 3)
        self.large.multiplyWithConstant(10)
        self.random.divideByConstant(3.6)
        self.assertAlmostEqual(self.originalSum / 3.6,
                               self.random.sumOfElements(), 4)
        self.random.multiplyWithConstant(3.6)

    def test_Add(self):
        self.random.add(self.identity)
        self.assertAlmostEqual(self.originalSum + 100,
                               self.random.sumOfElements(), 4)
        self.random.subtract(self.identity)

    def test_AddVector(self):
        self.large.addRowVector(4, self.V)
        self.assertEqual(1001000, self.large.sumOfElements(), 0.0)
        self.V.multiply(-1.0)
        self.large.addRowVector(4, self.V)
        self.V.multiply(-1.0)

    def test_Subtract(self):
        self.random.subtract(self.identity)
        self.assertAlmostEqual(self.originalSum - 100,
                               self.random.sumOfElements(), 4)
        self.random.add(self.identity)

    def test_MultiplyWithVectorFromLeft(self):
        result = self.small.multiplyWithVectorFromLeft(self.v)
        self.assertEqual(9, result.sumOfElements())
        result = self.large.multiplyWithVectorFromLeft(self.V)
        self.assertEqual(1000000, result.sumOfElements())
        result = self.random.multiplyWithVectorFromLeft(self.vr)
        self.assertAlmostEqual(self.originalSum, result.sumOfElements(), 4)

    def test_MultiplyWithVectorFromRight(self):
        result = self.small.multiplyWithVectorFromRight(self.v)
        self.assertEqual(9, result.sumOfElements())
        result = self.large.multiplyWithVectorFromRight(self.V)
        self.assertEqual(1000000, result.sumOfElements())
        result = self.random.multiplyWithVectorFromRight(self.vr)
        self.assertAlmostEqual(self.originalSum, result.sumOfElements(), 4)

    def test_ColumnSum(self):
        self.assertEqual(3, self.small.columnSum(randrange(3)))
        self.assertEqual(1000, self.large.columnSum(randrange(1000)))
        self.assertEqual(1, self.identity.columnSum(randrange(100)))

    def test_SumOfRows(self):
        self.assertEqual(9, self.small.sumOfRows().sumOfElements())
        self.assertEqual(1000000, self.large.sumOfRows().sumOfElements())
        self.assertEqual(100, self.identity.sumOfRows().sumOfElements())
        self.assertAlmostEqual(self.originalSum,
                               self.random.sumOfRows().sumOfElements(), 3)

    def test_RowSum(self):
        self.assertEqual(3, self.small.rowSum(randrange(3)))
        self.assertEqual(1000, self.large.rowSum(randrange(1000)))
        self.assertEqual(1, self.identity.rowSum(randrange(100)))

    def test_Multiply(self):
        result = self.small.multiply(self.small)
        self.assertEqual(27, result.sumOfElements())
        result = self.medium.multiply(self.medium)
        self.assertEqual(1000000.0, result.sumOfElements())
        result = self.random.multiply(self.identity)
        self.assertEqual(self.originalSum, result.sumOfElements())
        result = self.identity.multiply(self.random)
        self.assertEqual(self.originalSum, result.sumOfElements())

    def test_ElementProduct(self):
        result = self.small.elementProduct(self.small)
        self.assertEqual(9, result.sumOfElements())
        result = self.large.elementProduct(self.large)
        self.assertEqual(1000000, result.sumOfElements())
        result = self.random.elementProduct(self.identity)
        self.assertEqual(result.trace(), result.sumOfElements())

    def test_SumOfElements(self):
        self.assertEqual(9, self.small.sumOfElements())
        self.assertEqual(1000000, self.large.sumOfElements())
        self.assertEqual(100, self.identity.sumOfElements())
        self.assertEqual(self.originalSum, self.random.sumOfElements())

    def test_Trace(self):
        self.assertEqual(3, self.small.trace())
        self.assertEqual(1000, self.large.trace())
        self.assertEqual(100, self.identity.trace())

    def test_Transpose(self):
        self.assertEqual(9, self.small.transpose().sumOfElements())
        self.assertEqual(1000000, self.large.transpose().sumOfElements())
        self.assertEqual(100, self.identity.transpose().sumOfElements())
        self.assertAlmostEqual(self.originalSum,
                               self.random.transpose().sumOfElements(), 3)

    def test_IsSymmetric(self):
        self.assertTrue(self.small.isSymmetric())
        self.assertTrue(self.large.isSymmetric())
        self.assertTrue(self.identity.isSymmetric())
        self.assertFalse(self.random.isSymmetric())

    def test_Determinant(self):
        self.assertEqual(0, self.small.determinant())
        self.assertEqual(0, self.large.determinant())
        self.assertEqual(1, self.identity.determinant())

    def test_Inverse(self):
        self.identity.inverse()
        self.assertEqual(100, self.identity.sumOfElements())
        self.random.inverse()
        self.random.inverse()
        self.assertAlmostEqual(self.originalSum, self.random.sumOfElements(),
                               5)

    def test_Characteristics(self):
        vectors = self.small.characteristics()
        self.assertEqual(2, len(vectors))
        vectors = self.identity.characteristics()
        self.assertEqual(100, len(vectors))
        vectors = self.medium.characteristics()
        self.assertEqual(46, len(vectors))