Пример #1
0
    def Mutate(ds, mProb=0.1, mCount=1, isCount=False, mad=0.0):
        """
        Performs mutation with specified parameters. 
        Please note that the mutation should consider the fact that datasets might contain 
        repeated instances of the same data row, and the operation should consider consistency. 
        After performing mutation, all instances with exact same data should have a consistent label.

        """

        r2 = np.random.rand()
        if (r2 <= mProb):
            rands = set()
            i = 0

            while (i < mCount):

                r1 = np.random.randint(0, len(ds))
                if len(rands) == len(ds):
                    return ds

                if (r1 in rands):
                    continue
                instLabel = ds[r1, -1]

                #Mutation for non binary class values. Shift Using normal distribution random value
                if isCount:
                    shift = int(np.random.randn() * mad)
                    classVal = instLabel + shift
                    if (classVal < 0):
                        classVal = 0
                else:
                    classVal = (1 - instLabel)

                st = DPLIB.FindAllSimilarInstancesIndexes(r1, ds)

                for r1 in st:
                    rands.add(r1)
                    ds[r1, -1] = classVal

                i += 1
        return ds
Пример #2
0
    def crossOver(ds1, ds2, fixedSize, isCount=False):
        """
        Cross over operator. It supports both one point and two point cross over methods. 
        Further, it can keep the datasets the same size, as well as change the data in a way to
        generate varying size chromosomes. 

        Please note that the cross over should consider the fact that datasets might contain 
        repeated instances of the same data row, and the operation should consider consistency. 
        This is especially inportant since, data can come from multiple sources, as well as, the effect of
        mutation on particular instances from previous generations. 
        After performing crossover, all instances with exact same data should have a consistent label.
        This is done through majority voting rule in the cross over operations.


        """
        ss = len(ds1)
        point1 = 0
        point2 = 0

        if (fixedSize):
            point1 = np.random.randint(ss)
            point2 = point1
        else:
            point1 = np.random.randint(ss)
            point2 = np.random.randint(len(ds2))

            if (len(ds1) >= 4000):
                point1 = int(len(ds1) / 2)

            if (len(ds2) >= 4000):
                point2 = int(len(ds2) / 2)

        np.random.shuffle(ds1)
        np.random.shuffle(ds2)
        ds1c = np.copy(ds1[:point1, :])
        ds2c = np.copy(ds2[:point2, :])

        ds1c = np.append(ds1c, ds2[point2:, :], axis=0)
        ds2c = np.append(ds2c, ds1[point1:, :], axis=0)

        pSet = set()

        for i in range(len(ds1c)):
            if i in pSet:
                continue
            t = list(DPLIB.FindAllSimilarInstancesIndexes(i, ds1c))
            lbl = 0

            index = -1
            for j in range(len(t)):
                index = t[j]
                lbl += ds1c[index, -1]
                pSet.add(index)

            lbl = lbl / (len(t))
            if not isCount:
                if (lbl >= 0.5):
                    lbl = 1
                else:
                    lbl = 0
            else:
                if lbl < 0:
                    lbl = 0
            for j in range(len(t)):

                index = t[j]
                #Process extra
                #if ((int)ds1c.instance(index).classValue()!=(int)lbl)
                #    ds1c.instance(index).SetExtra(ds1c.instance(index).GetExtra() +"-C="+String.valueOf((int)(1-lbl))+">"+String.valueOf((int)lbl));
                ds1c[index, -1] = lbl

        pSet.clear()
        for i in range(len(ds2c)):
            if (i in pSet):
                continue
            t = list(DPLIB.FindAllSimilarInstancesIndexes(i, ds2c))
            lbl = 0
            index = -1
            for j in range(len(t)):
                index = t[j]
                lbl += ds2c[index, -1]
                pSet.add(index)

            lbl = lbl / len(t)

            if not isCount:

                if (lbl >= 0.5):
                    lbl = 1
                else:
                    lbl = 0
            else:
                if lbl < 0:
                    lbl = 0

            for j in range(len(t)):

                index = t[j]
                #Process extra
                #if ((int)ds2c.instance(index).classValue()!=(int)lbl)
                #    ds2c.instance(index).SetExtra(ds2c.instance(index).GetExtra() +"-C="+String.valueOf((int)(1-lbl))+">"+String.valueOf((int)lbl));
                ds2c[index, -1] = lbl

        return ds1c, ds2c