def Mutate(ds, mProb=0.1, mCount=1, isCount=False, mad=0.0): """ Performs mutation with specified parameters. Please note that the mutation should consider the fact that datasets might contain repeated instances of the same data row, and the operation should consider consistency. After performing mutation, all instances with exact same data should have a consistent label. """ r2 = np.random.rand() if (r2 <= mProb): rands = set() i = 0 while (i < mCount): r1 = np.random.randint(0, len(ds)) if len(rands) == len(ds): return ds if (r1 in rands): continue instLabel = ds[r1, -1] #Mutation for non binary class values. Shift Using normal distribution random value if isCount: shift = int(np.random.randn() * mad) classVal = instLabel + shift if (classVal < 0): classVal = 0 else: classVal = (1 - instLabel) st = DPLIB.FindAllSimilarInstancesIndexes(r1, ds) for r1 in st: rands.add(r1) ds[r1, -1] = classVal i += 1 return ds
def crossOver(ds1, ds2, fixedSize, isCount=False): """ Cross over operator. It supports both one point and two point cross over methods. Further, it can keep the datasets the same size, as well as change the data in a way to generate varying size chromosomes. Please note that the cross over should consider the fact that datasets might contain repeated instances of the same data row, and the operation should consider consistency. This is especially inportant since, data can come from multiple sources, as well as, the effect of mutation on particular instances from previous generations. After performing crossover, all instances with exact same data should have a consistent label. This is done through majority voting rule in the cross over operations. """ ss = len(ds1) point1 = 0 point2 = 0 if (fixedSize): point1 = np.random.randint(ss) point2 = point1 else: point1 = np.random.randint(ss) point2 = np.random.randint(len(ds2)) if (len(ds1) >= 4000): point1 = int(len(ds1) / 2) if (len(ds2) >= 4000): point2 = int(len(ds2) / 2) np.random.shuffle(ds1) np.random.shuffle(ds2) ds1c = np.copy(ds1[:point1, :]) ds2c = np.copy(ds2[:point2, :]) ds1c = np.append(ds1c, ds2[point2:, :], axis=0) ds2c = np.append(ds2c, ds1[point1:, :], axis=0) pSet = set() for i in range(len(ds1c)): if i in pSet: continue t = list(DPLIB.FindAllSimilarInstancesIndexes(i, ds1c)) lbl = 0 index = -1 for j in range(len(t)): index = t[j] lbl += ds1c[index, -1] pSet.add(index) lbl = lbl / (len(t)) if not isCount: if (lbl >= 0.5): lbl = 1 else: lbl = 0 else: if lbl < 0: lbl = 0 for j in range(len(t)): index = t[j] #Process extra #if ((int)ds1c.instance(index).classValue()!=(int)lbl) # ds1c.instance(index).SetExtra(ds1c.instance(index).GetExtra() +"-C="+String.valueOf((int)(1-lbl))+">"+String.valueOf((int)lbl)); ds1c[index, -1] = lbl pSet.clear() for i in range(len(ds2c)): if (i in pSet): continue t = list(DPLIB.FindAllSimilarInstancesIndexes(i, ds2c)) lbl = 0 index = -1 for j in range(len(t)): index = t[j] lbl += ds2c[index, -1] pSet.add(index) lbl = lbl / len(t) if not isCount: if (lbl >= 0.5): lbl = 1 else: lbl = 0 else: if lbl < 0: lbl = 0 for j in range(len(t)): index = t[j] #Process extra #if ((int)ds2c.instance(index).classValue()!=(int)lbl) # ds2c.instance(index).SetExtra(ds2c.instance(index).GetExtra() +"-C="+String.valueOf((int)(1-lbl))+">"+String.valueOf((int)lbl)); ds2c[index, -1] = lbl return ds1c, ds2c