def getLastMonthData(): trainingFeatures, trainTarget, test = pipeline.load_data() pipeline.digitizeMatrix(test) print("Finding last month account statuses") lastMonthData = trainingFeatures[trainingFeatures.fecha_dato == 15] lastMonthLabels = trainTarget[trainingFeatures.fecha_dato == 15] #lastMonthData= lastMonthData.groupby(lastMonthData.columns, axis = 1).transform(lambda x: x.fillna(x.mean())) lastMonthData = lastMonthData.fillna(lastMonthData.mean()) return (trainingFeatures, trainTarget, test, lastMonthData, lastMonthLabels)
def link_reg(sub): import os import pipeline as p d = p.load_data("%s/info.txt" % sub) if BaleenHP in d: os.chdir("%s/BaleenHP" % sub) src = "%s/register.dof6.dat" % d["BaleenHP"][Run1XXX] dst = "register.dat" print("%s -> %s" % (src dst)) if os.path.isfile(src) and not os.path.exists(dst): try: os.symlink(src dst) except: pass
def get_data(subjects): all_data = {} for sub in subjects: eve_dat = op.join('/%s/kuperberg/SemPrMM/MEG/data/%s/eve' % (pre, sub), 'info.txt') try: dat = load_data(eve_dat) # reorient dat into all_dat for k,v in dat.items(): if k not in all_data: all_data[k] = {} all_data[k][sub] = v except IOError: pass return all_data
def get_data(subjects): all_data = {} for sub in subjects: eve_dat = op.join('/%s/kuperberg/SemPrMM/MEG/data/%s/eve' % (pre, sub), 'info.txt') try: dat = load_data(eve_dat) # reorient dat into all_dat for k, v in dat.items(): if k not in all_data: all_data[k] = {} all_data[k][sub] = v except IOError: pass return all_data
def main(): data_path = '../data/techcrunch.csv' result = load_data(data_path)
def main(): result = pp.load_data(FILE_NAME) for dict_ in result: print("=> ", dict_)
def main(): data_path = "../data/techcrunch.csv" result = load_data(data_path) print("Result: ", result)
def main(): data_path = '../data/techcrunch.csv' result = load_data(data_path) print("Result", result)
import pipeline import submitData from sklearn.decomposition import PCA from sklearn.ensemble import GradientBoostingClassifier trainingFeatures, trainTarget, test = pipeline.load_data() pipeline.digitizeMatrix(test) print("====================Cleaning Test Data====================") del test['index'] del test['prev_fecha_dato'] test = test.fillna(test.mean()) print("====================DONE Cleaning Test Data====================") print( "====================Filling Training Feature NA's with means...===================" ) trainingFeatures.fillna(trainingFeatures.mean()) print( "====================DONE Filling Training Feature NA's with means...===================" ) print("===============Running PCA==================") pca = PCA(n_components=8) training = pca.fit_transform(trainingFeatures) testing = pca.fit_transform(test) print("===============DONE Runnning PCA============") print("==============Classifiying================") clf = GradientBoostingClassifier(n_estimators=70, verbose=True) predictions = submitData.fitClassifier(train,
def main(): data_path = '../data/techcrunch.csv' # Angabe des Datenpfades resultat = load_data(data_path) print("Resultat: ", resultat)