示例#1
0
def getLastMonthData():
    trainingFeatures, trainTarget, test = pipeline.load_data()
    pipeline.digitizeMatrix(test)
    print("Finding last month account statuses")
    lastMonthData = trainingFeatures[trainingFeatures.fecha_dato == 15]
    lastMonthLabels = trainTarget[trainingFeatures.fecha_dato == 15]
    #lastMonthData= lastMonthData.groupby(lastMonthData.columns, axis = 1).transform(lambda x: x.fillna(x.mean()))
    lastMonthData = lastMonthData.fillna(lastMonthData.mean())
    return (trainingFeatures, trainTarget, test, lastMonthData,
            lastMonthLabels)
示例#2
0
def link_reg(sub):
    import os
    import pipeline as p
    d = p.load_data("%s/info.txt" % sub)
    if BaleenHP in d:
        os.chdir("%s/BaleenHP" % sub)
        src = "%s/register.dof6.dat" % d["BaleenHP"][Run1XXX]
        dst = "register.dat"
        print("%s -> %s" % (src dst))
        if os.path.isfile(src) and not os.path.exists(dst):
            try:
                os.symlink(src dst)
            except:
                pass
示例#3
0
def get_data(subjects):
    all_data = {}
    for sub in subjects:
        eve_dat = op.join('/%s/kuperberg/SemPrMM/MEG/data/%s/eve' % (pre, sub), 'info.txt')
        try:
            dat = load_data(eve_dat)
            # reorient dat into all_dat
            for k,v in dat.items():
                if k not in all_data:
                    all_data[k] = {}
                all_data[k][sub] = v
        except IOError:
            pass
    return all_data
示例#4
0
def get_data(subjects):
    all_data = {}
    for sub in subjects:
        eve_dat = op.join('/%s/kuperberg/SemPrMM/MEG/data/%s/eve' % (pre, sub),
                          'info.txt')
        try:
            dat = load_data(eve_dat)
            # reorient dat into all_dat
            for k, v in dat.items():
                if k not in all_data:
                    all_data[k] = {}
                all_data[k][sub] = v
        except IOError:
            pass
    return all_data
示例#5
0
def main():
    data_path = '../data/techcrunch.csv'
    result = load_data(data_path)
示例#6
0
def main():
    result = pp.load_data(FILE_NAME)
    for dict_ in result:
        print("=> ", dict_)
示例#7
0
def main():
    data_path = "../data/techcrunch.csv"
    result = load_data(data_path)
    print("Result: ", result)
示例#8
0
文件: app.py 项目: Addi-Does/pipeline
def main():
    data_path = '../data/techcrunch.csv'
    result = load_data(data_path)
    print("Result", result)
import pipeline
import submitData
from sklearn.decomposition import PCA
from sklearn.ensemble import GradientBoostingClassifier

trainingFeatures, trainTarget, test = pipeline.load_data()
pipeline.digitizeMatrix(test)

print("====================Cleaning Test Data====================")
del test['index']
del test['prev_fecha_dato']
test = test.fillna(test.mean())
print("====================DONE Cleaning Test Data====================")

print(
    "====================Filling Training Feature NA's with means...==================="
)
trainingFeatures.fillna(trainingFeatures.mean())
print(
    "====================DONE Filling Training Feature NA's with means...==================="
)

print("===============Running PCA==================")
pca = PCA(n_components=8)
training = pca.fit_transform(trainingFeatures)
testing = pca.fit_transform(test)
print("===============DONE Runnning PCA============")

print("==============Classifiying================")
clf = GradientBoostingClassifier(n_estimators=70, verbose=True)
predictions = submitData.fitClassifier(train,
示例#10
0
def main():
    data_path = '../data/techcrunch.csv'  # Angabe des Datenpfades
    resultat = load_data(data_path)
    print("Resultat: ", resultat)