Пример #1
0
def heart(dataType):
    title = '{0} Ada Boost'.format(dataType)
    package = data.createData(dataType)

    xTrain = package.xTrain
    xTest = package.xTest
    yTrain = package.yTrain
    yTest = package.yTest

    param_range = list(range(1, 160, 10))
    param = 'n_estimators'

    params = {'algorithm': 'SAMME.R'}
    clf = AdaBoostClassifier()
    clf.set_params(**params)

    plotter.plotValidationCurve(clf,
                                xTrain,
                                yTrain,
                                param,
                                param_range,
                                graphTitle=title)
    plotter.plotLearningCurve(clf, title=title, xTrain=xTrain, yTrain=yTrain)
    title = 'Heart'
    clf.fit(xTrain, yTrain)
    plotter.plotConfusion(clf, title,
                          ['Diameter narrowing ', 'Diameter not narrowing'],
                          xTest, yTest)
Пример #2
0
    def verifyMessage(self, msg):
        '''
        Verify the given message content and mark it as received. The message have to be one of created by the
        :func:`generateMessage` generator.

        :param msg: message to verify
        '''
        sender = msg[0]['sender']
        number = msg[0]['number']
        expectedMsg = data.createData(sender, self.size, number, self.house,
                                      self.queue)
        equal = expectedMsg == msg[0]

        if sender not in self.received:
            self.received[sender] = (
                BitVector(size=self.count),  # received
                BitVector(size=self.count),  # duplicated
                BitVector(size=self.count))  # wrong content

        if self.received[sender][0][number]:  # duplicate
            self.received[sender][1][number] = 1
        else:  # first time received
            self.received[sender][0][number] = 1
        if not equal:
            self.received[sender][2][number] = 1
Пример #3
0
def heart(dataType):
    package = data.createData(dataType)

    xTrain = package.xTrain
    xTest = package.xTest
    yTrain = package.yTrain
    yTest = package.yTest
    title = '{0} Decision Tree'.format(dataType)
    xLabel = 'Depth'
    scoreList = util.ScoreList(xLabel)

    param_range = list(range(1, 20))
    param = 'max_depth'
    params = {
        'class_weight': None,
        'criterion': 'entropy',
        'max_features': None,
        'min_samples_leaf': 10,
        'splitter': 'best'
    }

    clf_tree = DecisionTreeClassifier(random_state=util.randState)
    clf_tree.set_params(**params)
    plotter.plotValidationCurve(clf_tree,
                                xTrain,
                                yTrain,
                                param,
                                param_range,
                                graphTitle=title + ' Max Depth ')
    plotter.plotLearningCurve(clf_tree,
                              title=title + 'Max Depth',
                              xTrain=xTrain,
                              yTrain=yTrain)

    clf_tree = DecisionTreeClassifier(random_state=util.randState)
    clf_tree.set_params(**params)
    clf_tree.max_depth = 8
    param_range = [10, 50, 75, 100]
    param = 'min_samples_leaf'

    plotter.plotValidationCurve(clf_tree,
                                xTrain,
                                yTrain,
                                param,
                                param_range,
                                graphTitle=title + ' Min Samples Leaf ')

    clf_tree.min_samples_leaf = 10
    title = 'Heart'
    # plotter.plotLearningCurve(clf_tree, title=title + 'Min Samples Leaf', xTrain=xTrain, yTrain=yTrain)
    plotter.plotLearningCurve(clf_tree,
                              title=title,
                              xTrain=xTrain,
                              yTrain=yTrain)
    clf_tree.fit(xTrain, yTrain)
    plotter.plotConfusion(clf_tree, title,
                          ['Diameter narrowing ', 'Diameter not narrowing'],
                          xTest, yTest)
Пример #4
0
def run():

    getClusterData('adult', 'PCA')
    adultPackage = unprocess(data.createData('adult'))
    heartPackage = unprocess(data.createData('heart'))

    adultPackage = data.createData('adult')
    heartPackage = data.createData('heart')

    topRange = 5
    adultClasses = ['>50K', '<=50K']
    heartClasses = ['Diameter narrowing ', 'Diameter not narrowing']

    runAll(adultPackage, adultClasses, topRange, 'Adult', 'None')
    runAll(heartPackage, heartClasses, topRange, 'Heart', 'None')

    adultPackage.xTrain = dimRedu.getPCAData(adultPackage.xTrain, 'Adult')
    heartPackage.xTrain = dimRedu.getPCAData(heartPackage.xTrain, 'Heart')
    runAll(adultPackage, adultClasses, topRange, 'Adult', 'PCA')
    runAll(heartPackage, heartClasses, topRange, 'Heart', 'PCA')

    adultPackage = data.createData('adult')
    heartPackage = data.createData('heart')
    adultPackage.xTrain = dimRedu.getICAData(adultPackage.xTrain, 'Adult')
    heartPackage.xTrain = dimRedu.getICAData(heartPackage.xTrain, 'Heart')
    runAll(adultPackage, adultClasses, topRange, 'Adult', 'ICA')
    runAll(heartPackage, heartClasses, topRange, 'Heart', 'ICA')

    adultPackage = data.createData('adult')
    heartPackage = data.createData('heart')
    adultPackage.xTrain = dimRedu.getRCAData(adultPackage.xTrain, 'Adult')
    heartPackage.xTrain = dimRedu.getRCAData(heartPackage.xTrain, 'Heart')
    runAll(adultPackage, adultClasses, topRange, 'Adult', 'RCA')
    runAll(heartPackage, heartClasses, topRange, 'Heart', 'RCA')

    adultPackage = unprocess(adultPackage)
    heartPackage = unprocess(heartPackage)
    adultPackage.xTrain = dimRedu.getFAMDData(adultPackage.xTrain, 'Adult')
    heartPackage.xTrain = dimRedu.getFAMDData(heartPackage.xTrain, 'Heart')
    runAll(adultPackage, adultClasses, topRange, 'Adult', 'FAMD')
    runAll(heartPackage, heartClasses, topRange, 'Heart', 'FAMD')

    dimRedu.run(heartPackage, 'Heart')
    dimRedu.run(adultPackage, 'Adult')

    print('done')
Пример #5
0
def getClusterData(dataType, clusterType):
    package = data.createData(dataType)

    if (clusterType == 'PCA'):
        x = dimRedu.getPCAData(package.xTrain, dataType)
    elif clusterType == 'ICA':
        x = dimRedu.getICAData(package.xTrain, dataType)
    elif clusterType == 'RCA':
        x = dimRedu.getRCAData(package.xTrain, dataType)
    else:
        x = dimRedu.getFAMDData(package.xTrain, dataType)

    km = KMeans(n_clusters=4)
    cluster_labels = km.fit_predict(x)
    predictions = km.predict(x)
    a = 4
Пример #6
0
def adult(dataType):
    package = data.createData(dataType)

    xTrain = package.xTrain
    xTest = package.xTest
    yTrain = package.yTrain
    yTest = package.yTest

    xLabel = 'Network Layers'
    scoreList = util.ScoreList(xLabel)
    title = '{0} Neural Network'.format(dataType)

    params = {
        'activation': 'relu',
        'learning_rate': 'adaptive',
        'solver': 'sgd'
    }
    params = {
        'activation': 'relu',
        'learning_rate': 'invscaling',
        'solver': 'lbfgs'
    }
    # params = searcher.searchNetwork(xTrain, yTrain, xTest, yTest)

    clf = MLPClassifier(max_iter=250)
    input = package.features.shape[1]
    input = int(.7 * input)
    # hiddenLayers = (input, 20)
    # hiddenLayers = (input,)
    # param_range = [(input,1,2), (input,2,2),(input,3,2),(input,4,2),(input,5,2),(input,6,2),(input,7,2),(input,8,2),(input,9,2), (input,10,2)]
    # xRange = [1, 2, 3, 4, 5, 6, 7 , 8, 9, 10]
    # plotter.plotValidationCurve(clf, xTrain, yTrain, 'hidden_layer_sizes', param_range, title + ' Hidden Layers ', xRange)

    # clf = MLPClassifier(hidden_layer_sizes = (input,7,2))
    # clf.set_params(**params)
    # param_range = [50, 75, 100, 125, 150, 175, 200, 225, 250, 275, 300, 400, 500, 600, 700, 800]
    # plotter.plotValidationCurve(clf, xTrain, yTrain, 'max_iter', param_range, graphTitle=title + ' Max Iterations ')

    clf = MLPClassifier(hidden_layer_sizes=(input, 7, 2))
    clf.max_iter = 150
    plotter.plotLearningCurves(clf, title=title, xTrain=xTrain, yTrain=yTrain)
    title = 'Adult'
    clf.fit(xTrain, yTrain)
    plotter.plotConfusion(clf, title, ['>50K', '<=50K'], xTest, yTest)
    return
Пример #7
0
def run(dataType):
    # dataType = 'heart'
    package = data.createData(dataType)
    if dataType == 'heart':
        iterations = range(997, 1000)
        iterations = [100, 600, 997, 999, 1000, 2000, 3000, 4000]
        iterations = []
        for i in range(100, 4000, 20):
            iterations.append(i)
        # iterations = [599, 6000]
        heart(package, iterations)
        mlRoseHeart(package, iterations)
    # else:
    #     iterations = range(1, 800)
    #     iterations = [799, 800]
    #     adult(package, iterations)
    #     mlRoseAdult(package, iterations)
    return
Пример #8
0
def adultFit(dataType):

    package = data.createData(dataType)

    xTrain = package.xTrain
    xTest = package.xTest
    yTrain = package.yTrain
    yTest = package.yTest

    xLabel = 'Network Layers'
    scoreList = util.ScoreList(xLabel)
    title = '{0} Neural Network Fit Times'.format(dataType)

    params = {
        'activation': 'relu',
        'learning_rate': 'invscaling',
        'solver': 'lbfgs'
    }

    input = package.features.shape[1]
    input = int(.7 * input)

    param_range = [
        50, 75, 100, 125, 150, 175, 200, 225, 250, 275, 300, 400, 500, 600,
        700, 800
    ]

    times = []
    for param in param_range:

        clf = MLPClassifier(hidden_layer_sizes=(input, 7, 2))
        clf.set_params(**params)
        clf.max_iter = param
        start = time()
        clf.fit(xTrain, yTrain)
        end = time()
        times.append(end - start)

    plotter.plot(param_range, times, 'max_iter', 'fit times', title)
    return
Пример #9
0
def heart(dataType):

    package = data.createData(dataType)

    xTrain = package.xTrain
    xTest = package.xTest
    yTrain = package.yTrain
    yTest = package.yTest
    xLabel = 'K'
    scoreList = util.ScoreList(xLabel)
    title = '{0} KNN'.format(dataType)

    # searcher.searchKNN(xTrain, yTrain, xTest, yTest)
    params = {'algorithm': 'auto', 'p': 1, 'weights': 'uniform'}
    params = {'algorithm': 'ball_tree', 'p': 1, 'weights': 'distance'}
    # params = searcher.searchKNN(xTrain, yTrain, xTest, yTest)

    param = 'n_neighbors'
    param_range = list(range(1, 50))  #np.linspace(1, 50, 50)

    clf = KNeighborsClassifier()
    clf.set_params(**params)

    plotter.plotValidationCurve(clf,
                                xTrain,
                                yTrain,
                                param,
                                param_range,
                                graphTitle=title)

    clf = KNeighborsClassifier()
    clf.set_params(**params)
    clf.n_neighbors = 12
    plotter.plotLearningCurve(clf, title=title, xTrain=xTrain, yTrain=yTrain)
    # plotter.plotAll(clf, title, param, param_range, xTrain, yTrain, xTest, yTest)
    title = 'Heart'
    clf.fit(xTrain, yTrain)
    plotter.plotConfusion(clf, title,
                          ['Diameter narrowing ', 'Diameter not narrowing'],
                          xTest, yTest)
Пример #10
0
    def verifyMessage(self, msg):
        '''
        Verify the given message content and mark it as received. The message have to be one of created by the
        :func:`generateMessage` generator.

        :param msg: message to verify
        '''
        sender = msg[0]['sender']
        number = msg[0]['number']
        expectedMsg = data.createData(sender, self.size, number, self.house, self.queue)
        equal = expectedMsg == msg[0]

        if sender not in self.received:
            self.received[sender] = (BitVector(size=self.count),  # received
                                     BitVector(size=self.count),  # duplicated
                                     BitVector(size=self.count))  # wrong content

        if self.received[sender][0][number]:  # duplicate
            self.received[sender][1][number] = 1
        else:  # first time received
            self.received[sender][0][number] = 1
        if not equal:
            self.received[sender][2][number] = 1
Пример #11
0
def adult(dataType):
    package = data.createData(dataType)
    
    xTrain = package.xTrain
    xTest = package.xTest 
    yTrain = package.yTrain
    yTest = package.yTest
    
    xLabel = 'Degrees' 
    title =  '{0} SVM'.format(dataType)
  
    param_range = list(range(1,8))
     
    # polyparams = searcher.searchSVMPoly(xTrain, yTrain, xTest, yTest) 
    polyparams = {'C': 0.1, 'degree': 1, 'gamma': 50, 'kernel': 'poly'} 
    clf = createBaseSVC()
    clf.set_params(**polyparams)
    plotter.plotValidationCurve(clf, xTrain, yTrain, 'degree', param_range, graphTitle=title + ' Poly degree ')
    clf.degree = 1
    plotter.plotLearningCurve(clf, title=title + ' Poly degree ', xTrain=xTrain, yTrain=yTrain)
 
 
    # rbfParams = searcher.searchSVMRBF(xTrain, yTrain, xTest, yTest) 
    rbfParams = {'C': 1000, 'gamma': 0.01, 'kernel': 'rbf'} 
 
    clf = createBaseSVC()
    clf.set_params(**rbfParams)
    param = 'C'
    # param_range = [0.01,0.05,1,10,50,100,200,300,500, 1000]
    param_range = [0.01,0.05,1,10,15]
    plotter.plotValidationCurve(clf, xTrain, yTrain, param, param_range, graphTitle=title+ ' RBF - C ')
    clf.C = 10
    plotter.plotLearningCurve(clf, title=title + ' RBF', xTrain=xTrain, yTrain=yTrain)
    
    title = 'Adult' 
    clf.fit(xTrain, yTrain)
    plotter.plotConfusion(clf, title, ['>50K', '<=50K'], xTest, yTest)
Пример #12
0
def heart(dataType):
    package = data.createData(dataType)
    
    xTrain = package.xTrain
    xTest = package.xTest 
    yTrain = package.yTrain
    yTest = package.yTest
       
    title =  '{0} SVM'.format(dataType)
  
    param_range = list(range(1,8))
    # polyparams = searcher.searchSVMPoly(xTrain, yTrain, xTest, yTest)
    # polyparams = {'kernel': 'poly', 'gamma': 'scale'}
    polyparams = {'C': 0.01, 'degree': 3, 'gamma': 10, 'kernel': 'poly'}
    clf = createBaseSVC()
    clf.set_params(**polyparams)
    plotter.plotValidationCurve(clf, xTrain, yTrain, 'degree', param_range, graphTitle=title + ' Poly Degree ')
    
    clf.degree = 3
    plotter.plotLearningCurve(clf, title=title + ' Poly degree ', xTrain=xTrain, yTrain=yTrain)
 
 
    # rbfParams = searcher.searchSVMRBF(xTrain, yTrain, xTest, yTest)   
    rbfParams = {'C': 1, 'gamma': 1, 'kernel': 'rbf'}
    clf = createBaseSVC()
    clf.set_params(**rbfParams)
    param = 'C' 
    param_range = [0.01,0.05,0.25, 0.5, 1]
    plotter.plotValidationCurve(clf, xTrain, yTrain, param, param_range, graphTitle=title + ' RBF - C ')
    clf.C = 0.5
    plotter.plotLearningCurve(clf, title=title + ' RBF', xTrain=xTrain, yTrain=yTrain)


    title = 'Heart' 
    clf.fit(xTrain, yTrain)
    plotter.plotConfusion(clf, title, ['Diameter narrowing ', 'Diameter not narrowing'], xTest, yTest)
Пример #13
0
def run():
    dataType = 'heart'
    package = data.createData(dataType)

    iterations = range(599, 699)

    heart(package, iterations, 'Baseline')

    package.xTrain = dimRedu.getPCAData(package.xTrain, 'Heart')
    package.xTest = dimRedu.getPCAData(package.xTest, 'Heart')
    heart(package, iterations, 'PCA')

    package = data.createData(dataType)
    package.xTrain = dimRedu.getICAData(package.xTrain, 'Heart')
    package.xTest = dimRedu.getICAData(package.xTest, 'Heart')
    heart(package, iterations, 'ICA')

    package = data.createData(dataType)
    package.xTrain = dimRedu.getRCAData(package.xTrain, 'Heart')
    package.xTest = dimRedu.getRCAData(package.xTest, 'Heart')
    heart(package, iterations, 'RCA')

    package = data.createData(dataType)
    package = unprocess(package)
    package.xTrain = dimRedu.getFAMDData(package.Unprocessed_xTrain, 'Heart')
    package.xTest = dimRedu.getFAMDData(package.Unprocessed_xTest, 'Heart')
    heart(package, iterations, 'FAMD')

    dataType = 'adult'
    package = data.createData(dataType)
    iterations = range(799, 899)

    adult(package, iterations, 'Baseline')

    package.xTrain = dimRedu.getPCAData(package.xTrain, 'Adult')
    package.xTest = dimRedu.getPCAData(package.xTest, 'Adult')
    adult(package, iterations, 'PCA')

    package = data.createData(dataType)
    package.xTrain = dimRedu.getICAData(package.xTrain, 'Adult')
    package.xTest = dimRedu.getICAData(package.xTest, 'Adult')
    adult(package, iterations, 'ICA')

    package = data.createData(dataType)
    package.xTrain = dimRedu.getRCAData(package.xTrain, 'Adult')
    package.xTest = dimRedu.getRCAData(package.xTest, 'Adult')
    adult(package, iterations, 'RCA')

    package = data.createData(dataType)
    package = unprocess(package)
    package.xTrain = dimRedu.getFAMDData(package.Unprocessed_xTrain, 'Adult')
    package.xTest = dimRedu.getFAMDData(package.Unprocessed_xTest, 'Adult')
    adult(package, iterations, 'FAMD')

    #     # iterations = [599, 6000]
    #     heart(package, iterations)
    # else:
    #     iterations = range(1, 800)
    #     iterations = [799, 800]
    #     adult(package, iterations)
    return
Пример #14
0
import warnings
warnings.filterwarnings("ignore")

import data

if __name__ == '__main__':

    import plotclusterscores
    plotclusterscores.run()
    import dimRedu
    datatypes = ['adult', 'heart']
    for d in datatypes:
        package = data.createData(d)
        dimRedu.run(package, d)

    import neuralDim
    neuralDim.run()

    import cluster
    cluster.run()
Пример #15
0
    #build the dictionary of labels to do some compute
    labelsDict = {}
    for label in dataSet[:, -1]:
        if label not in labelsDict:
            labelsDict[label] = 0
        labelsDict[label] += 1

    print("labelDict:", labelsDict)
    #compute the entropy
    for key in labelsDict.keys():
        prob = float(labelsDict[key]) / numOfSamples
        entropy += prob * math.log2(prob)

    return entropy * -1


samples, cluts = data.createData()
print("entropy:", getEntropy(samples))
'''
def splitDataSet(dataSet, axis, value):
    retDataSet = []
    for featVec in dataSet:
        if featVec[axis] == value:
            reducedFeatVec = featVec[:axis]     #chop out axis used for splitting
            reducedFeatVec.extend(featVec[axis+1:])
            retDataSet.append(reducedFeatVec)
    return retDataSet

print(splitDataSet(samples,0,1))
'''
Пример #16
0
            for coll in ln.collections:
                coll.remove()
            lineSet.clear()
        ln = plt.contour(X, Y, Z, [-0.5, 0, 0.5], colors=['r', 'black', 'b'])
        lineSet.append(ln)
        if label != None:
            plt.xlabel(label)
        # plt.show()
        plt.savefig("cache/data_mode" + str(self.config.mode) +
                    "_%.8d.png" % self.figCount)
        self.figCount += 1
        plt.pause(pause)


if __name__ == "__main__":
    x, y, c = data.createData(4)

    xx = x[0:44]
    yy = y[0:44]
    cc = c[0:44]
    del x[0:44]
    del y[0:44]
    del c[0:44]
    Data = concate2D(x, y)
    Data_test = concate2D(xx, yy)
    lineSet = []

    # --- Experiment for 3.1 ---
    # config = netConfig()
    # config.set(inputD=3, outputD=1, layer=1, nodes=[1], lr=0.01, mode=0, batch=1,
    #            maxIter=100, active=noActive, activeDiff=noActiveDiff)
import json
import datetime
import dateutil.parser
import pandas as pd
import pytz
from data import createData

# importing sample data
data = createData()

data_for_df = {'time_of_day': [], "count": []}
timeList = pd.DataFrame(data_for_df)
timeList.set_index('time_of_day', inplace=True)

today = pytz.UTC.localize(datetime.datetime.now())

threeMonthsAgo = today - datetime.timedelta(days=90)

count = 1
curDate = dateutil.parser.parse(data[count]['start_date'])

while (curDate > threeMonthsAgo and count < len(data)):
    dayStr = curDate.strftime("%a")  # getting day of week in 3 letters
    dayTime = curDate.strftime("%H")  # getting time of day
    dayStr = dayStr + " " + dayTime
    if dayStr in timeList.index.values:
        timeList['count'][dayStr] = timeList['count'][dayStr] + 1
    else:
        tempDf = pd.DataFrame({'time_of_day': [dayStr], "count": [1]})
        tempDf.set_index('time_of_day', inplace=True)
        timeList = timeList.append(tempDf)
Пример #18
0
#sigma:matrix sigma
#v_t:matrix v_t
#num:the shape of new sigma matrix we want to take
def reshapeMatrix(u,sigma,v_t,num):
    sigma_matrix=np.zeros(shape=(num,num))
    for i in range(num):
        sigma_matrix[i][i]=sigma[i]
    return u[:,:num],sigma_matrix,v_t[:num,:]

def EuclidSimilarity(vecA,vecB):
    return 1.0/(1.0+np.linalg.norm(vecA-vecB))


def predict()

dataSet1=data.createData()
#print(dataSet1)
#print(dataSet1.shape)
U,sigma,V_T=np.linalg.svd(dataSet1)
'''
print("U:\n",U)
print(U.shape)
print(type(U))
print("sigma:\n",sigma)
print("V_T:\n",V_T)
print("\n\n\n\n\n")
'''

U_matrix,sigma_matrix,V_T_matrix=reshapeMatrix(U,sigma,V_T,3)
'''
print("U:\n",U_matrix)