def plotManifolds(all_train_x, all_train_y, all_test_x, trainSampleWeights, testSampleWeights, n_jobs=23): mult = 20 trainSampleWeights = mult*(trainSampleWeights - trainSampleWeights.min()) + 2 testSampleWeights = mult*(testSampleWeights - testSampleWeights.min()) + 2 # ------ transform and plot ------ n_neighbors = 10 names = ['LLE'] transformers = [('LLE', LocallyLinearEmbedding(n_neighbors=n_neighbors, method='standard')), ('MDS', MDS(n_jobs=n_jobs)), ('Isomap', Isomap(n_neighbors=n_neighbors)), ('Spectral Embedding', SpectralEmbedding(n_neighbors=n_neighbors))] fig = pylab.figure() for i, (name, transformer) in enumerate(transformers): print '---', name, '---' # transform t0 = time() traindata = deepcopy(transformer).fit_transform(all_train_x) testdata = deepcopy(transformer).fit_transform(all_test_x) printDoneTime(t0) # plot ax = fig.add_subplot(2, len(transformers),2*i+1) for surv in [0,1]: color = "red" if surv==1 else "blue" label = 'survived' if surv==1 else 'died' ind = all_train_y==surv pylab.scatter(traindata[ind,0], traindata[ind,1], c=color, alpha=0.7, s=trainSampleWeights, linewidths=0.5, label=label) pylab.title(name + " (Training Data)") ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) ax.legend(loc="best") pylab.axis('tight') ax = fig.add_subplot(2, len(transformers),2*i+2) pylab.scatter(testdata[:,0], testdata[:,1], c="grey", s=testSampleWeights) pylab.title(name + " (Test Data)") ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) pylab.axis('tight') pylab.show()
# trainX, trainY, testX, testY = loadObject("H:/allcvdata")[1] # params = getParamsFromIndices([2, 2, 1, 0, 2], allParamsDict) # pipe.set_params(**params) # pipe.fit(trainX, trainY) # print 'here' # print accuracy_score(testY, pipe.predict(testX)) t0 = time() # newpipe, best_params, score = fitClfWithGridSearch(name, pipe, allParamsDict, data, os.path.join(rootdir, 'intermediate results'), # n_jobs=20, cvSplitNum=10, random_state=random_state, useJJ=useJJ, verbosity=2, # # maxLearningSteps=30, numConvergenceSteps=4, eliteProportion=0.1, saveCache=True, # parentsProportion=0.4, populationSize=15, # mutationProbability=0.3, mutationProportion=0.2, mutationStdDev=None, maxDuplicateProportion=0) # print 'SCORE =', score # print 'x'*50 buildModel(data, testData, fieldMaps, selectedClfs=['svc'], useJJ=useJJ, n_jobs=20, writeResults=True, colNames=['sex', 'name', 'embarked'], cvNumSplits=10, random_states=random_states, verbose=True, maxLearningSteps=30, numConvergenceSteps=5, eliteProportion=0.1, saveCache=True, parentsProportion=0.4, populationSize=12, verbosity=2, mutationProbability=0.3, mutationProportion=0.2, mutationStdDev=None, maxDuplicateProportion=0) printDoneTime(t0) print '>>>> FIN <<<<'