예제 #1
0
def plotManifolds(all_train_x, all_train_y, all_test_x, trainSampleWeights, testSampleWeights, n_jobs=23):
    mult = 20
    trainSampleWeights = mult*(trainSampleWeights - trainSampleWeights.min()) + 2
    testSampleWeights = mult*(testSampleWeights - testSampleWeights.min()) + 2

    # ------ transform and plot ------
    n_neighbors = 10
    names = ['LLE']
    transformers = [('LLE', LocallyLinearEmbedding(n_neighbors=n_neighbors, method='standard')),
                    ('MDS', MDS(n_jobs=n_jobs)),
                    ('Isomap', Isomap(n_neighbors=n_neighbors)),
                    ('Spectral Embedding', SpectralEmbedding(n_neighbors=n_neighbors))]

    fig = pylab.figure()

    for i, (name, transformer) in enumerate(transformers):
        print '---', name, '---'
        # transform
        t0 = time()
        traindata = deepcopy(transformer).fit_transform(all_train_x)
        testdata = deepcopy(transformer).fit_transform(all_test_x)
        printDoneTime(t0)

        # plot
        ax = fig.add_subplot(2, len(transformers),2*i+1)
        for surv in [0,1]:
            color = "red" if surv==1 else "blue"
            label = 'survived' if surv==1 else 'died'
            ind = all_train_y==surv
            pylab.scatter(traindata[ind,0], traindata[ind,1], c=color, alpha=0.7, s=trainSampleWeights, linewidths=0.5, label=label)
        pylab.title(name + " (Training Data)")
        ax.xaxis.set_major_formatter(NullFormatter())
        ax.yaxis.set_major_formatter(NullFormatter())
        ax.legend(loc="best")
        pylab.axis('tight')

        ax = fig.add_subplot(2, len(transformers),2*i+2)
        pylab.scatter(testdata[:,0], testdata[:,1], c="grey", s=testSampleWeights)
        pylab.title(name + " (Test Data)")
        ax.xaxis.set_major_formatter(NullFormatter())
        ax.yaxis.set_major_formatter(NullFormatter())
        pylab.axis('tight')

    pylab.show()
예제 #2
0
파일: blah.py 프로젝트: jennyyuejin/Kaggle
    # trainX, trainY, testX, testY = loadObject("H:/allcvdata")[1]
    # params = getParamsFromIndices([2, 2, 1, 0, 2], allParamsDict)
    # pipe.set_params(**params)
    # pipe.fit(trainX, trainY)
    # print 'here'
    # print accuracy_score(testY, pipe.predict(testX))

    t0 = time()

    # newpipe, best_params, score = fitClfWithGridSearch(name, pipe, allParamsDict, data, os.path.join(rootdir, 'intermediate results'),
    #                                                    n_jobs=20, cvSplitNum=10, random_state=random_state, useJJ=useJJ, verbosity=2,
    #
    #                                                    maxLearningSteps=30, numConvergenceSteps=4, eliteProportion=0.1, saveCache=True,
    #                                                    parentsProportion=0.4, populationSize=15,
    #                                                    mutationProbability=0.3, mutationProportion=0.2, mutationStdDev=None, maxDuplicateProportion=0)
    # print 'SCORE =', score

    # print 'x'*50
    buildModel(data, testData, fieldMaps, selectedClfs=['svc'], useJJ=useJJ, n_jobs=20, writeResults=True,
              colNames=['sex', 'name', 'embarked'], cvNumSplits=10, random_states=random_states, verbose=True,

              maxLearningSteps=30, numConvergenceSteps=5, eliteProportion=0.1, saveCache=True,
              parentsProportion=0.4, populationSize=12, verbosity=2,
              mutationProbability=0.3, mutationProportion=0.2, mutationStdDev=None, maxDuplicateProportion=0)


    printDoneTime(t0)
    print '>>>> FIN <<<<'