def main(): # Create 2 artificial clusters that partially overlap X,y = Tomeklink.createCluster() print y # Plot the clusters colors = np.array([x for x in 'bgrcmykbgrcmykbgrcmykbgrcmyk']) colors = np.hstack([colors] * 20) pl.scatter(np.array(X)[:, 0], np.array(X)[:, 1], color=colors[y].tolist(), s=10) pl.show() # Detect the TomekLinks in the data tomeklinks = Tomeklink.detectTomekLinks(X,y) # Remove the TomekLinks from the data X,y = Tomeklink.removeTomekLinks(tomeklinks,X,y) # Plat the data again with the TomekLinks removed colors = np.array([x for x in 'bgrcmykbgrcmykbgrcmykbgrcmyk']) colors = np.hstack([colors] * 20) pl.scatter(np.array(X)[:, 0], np.array(X)[:, 1], color=colors[y].tolist(), s=10) pl.show()
oversample = smote.SMOTE(np.array(unhealthyMatrix), 1000, 5) # final= healthy + unhealthyMatrix oversampling final = np.concatenate((healthyMatrix, oversample), axis=0) # svm X = [sub[1:] for sub in final] Y = [] for sub in final: Y.append(sub[0]) # Detect the TomekLinks in the data tomeklinks = Tomeklink.detectTomekLinks(X, Y) # Remove the TomekLinks from the data X, Y = Tomeklink.removeTomekLinks(tomeklinks, X, Y) clf = svm.SVC() clf.fit(X, Y) # predict test = [] data = pd.read_table(path2, header=None) for row in data.iterrows(): test.append(row[1].tolist()) X_test = [sub[1:] for sub in test] rightAns = [] for sub in test: rightAns.append(sub[0])