def run_explicit_loop(): for problem in DATA.explicit_problems: prob,target = problem.split(";") df = DATA.read_datafile("explicit", prob) cols = [col for col in df.columns if not (col == target or col == "T" or (len(col)>2 and col[:2] == "D_"))] ins = df[cols].as_matrix() outs = df[target].values print("\n\n", prob, target, ins.shape, outs.shape, "\n=======================\n") models = ffx.run(ins,outs, ins,outs, cols) for model in models: print_model(model.complexity(), model, ins, outs) for problem in DATA.diffeq_problems: prob,target = problem.split(";") df = DATA.read_datafile("diffeq", prob) cols = [col for col in df.columns if not (col == target or col == "T" or (len(col)>2 and col[:2] == "D_"))] ins = df[cols].as_matrix() outs = df[target].values print("\n\n", prob, target, ins.shape, outs.shape, "\n=======================\n") models = ffx.run(ins,outs, ins,outs, cols) for model in models: print_model(model.complexity(), model, ins, outs)
def testMultiFFXModelFactory(self): # Use numpy.ndarray models = ffx.run(self.xtrain, self.ytrain, self.xtest, self.ytest, self.data.columns) assert abs(np.mean([model.test_nmse for model in models]) - 0.4391323) < self.EPS # Use pandas.DataFrame models = ffx.run(self.xtrain_pandas, self.ytrain, self.xtest_pandas, self.ytest) assert abs(np.mean([model.test_nmse for model in models]) - 0.4391323) < self.EPS
def test_readme_example(): train_X = np.array([(1.5, 2, 3), (4, 5, 6)]).T train_y = np.array([1, 2, 3]) test_X = np.array([(5.241, 1.23, 3.125), (1.1, 0.124, 0.391)]).T test_y = np.array([3.03, 0.9113, 1.823]) np.random.seed(0) models = ffx.run(train_X, train_y, test_X, test_y, ["a", "b"]) assert [(model.numBases(), model.complexity(), str(model)) for model in models] == EXPECTED
def test_multi_ffx_model_factory(iris): np.random.seed(0) xtrain_pandas = iris.iloc[:50, 0:2] xtest_pandas = iris.iloc[51:100, 0:2] xtrain = xtrain_pandas.values ytrain = iris.iloc[:50, 2] xtest = xtest_pandas.values ytest = iris.iloc[51:100, 2] # Use numpy.ndarray models = ffx.run(xtrain, ytrain, xtest, ytest, iris.columns) assert abs( np.mean([model.test_nmse for model in models]) - 0.5821326214099275) < EPS # Use pandas.DataFrame models = ffx.run(xtrain_pandas, ytrain, xtest_pandas, ytest) assert abs( np.mean([model.test_nmse for model in models]) - 0.5821326214099275) < EPS
def test_x_square(): np.random.seed(0) # This creates a dataset of 1 predictor train_X = np.array([[0, 1, 2, 3]]).T train_y = np.array([0, 1, 4, 9]) test_X = np.array([[4, 5, 6, 7]]).T test_y = np.array([16, 25, 36, 49]) models = ffx.run(train_X, train_y, test_X, test_y, ["x"]) assert [(model.numBases(), model.complexity(), str(model)) for model in models] == EXPECTED
def run_explicit_loop(): for problem in DATA.explicit_problems: prob, target = problem.split(";") df = DATA.read_datafile("explicit", prob) cols = [ col for col in df.columns if not (col == target or col == "T" or (len(col) > 2 and col[:2] == "D_")) ] ins = df[cols].as_matrix() outs = df[target].values print("\n\n", prob, target, ins.shape, outs.shape, "\n=======================\n") models = ffx.run(ins, outs, ins, outs, cols) for model in models: print_model(model.complexity(), model, ins, outs) for problem in DATA.diffeq_problems: prob, target = problem.split(";") df = DATA.read_datafile("diffeq", prob) cols = [ col for col in df.columns if not (col == target or col == "T" or (len(col) > 2 and col[:2] == "D_")) ] ins = df[cols].as_matrix() outs = df[target].values print("\n\n", prob, target, ins.shape, outs.shape, "\n=======================\n") models = ffx.run(ins, outs, ins, outs, cols) for model in models: print_model(model.complexity(), model, ins, outs)
def fit(self, chips): """Fit an FFX model. Parameters ---------- chips : list A list of chip model objects. """ data = [chip.LCT.values() + [chip.gnd] for chip in chips] xtrain, ytrain, xtest, ytest = partition(data) self.models = ffx.run(self.xtrain, self.ytrain, self.xtest, self.ytest, self.data.columns) self.best_model = np.argmin([model.test_nmse for model in models])
def learn_ffx(predicate='p', size=None): train_x = [] for i in xrange(26): train_x.append([]) train_y = [] test_x = [] for i in xrange(26): test_x.append([]) test_y = [] csv_reader = csv.reader( open(os.path.dirname(os.path.abspath(__file__)) + '/data.csv~')) if size is None: size = -1 row_number = 0 for row in csv_reader: if row_number == size: break for i, element in enumerate(row): row[i] = np.float64(element) x, y = None, None if row_number < size / 2: x, y = train_x, train_y else: x, y = test_x, test_y for i in xrange(26): x[i].append(row[i]) y.append(row[26 + TemporalRelation.all_relations.index(predicate)]) row_number += 1 train_x = np.array(train_x).T test_x = np.array(test_x).T train_y = np.array(train_y) test_y = np.array(test_y) models = ffx.run(train_x, train_y, test_x, test_y, list(TemporalRelation.all_relations)) for model in models: print model
def learn_ffx(predicate='p', size=None): train_x = [] for i in xrange(26): train_x.append([]) train_y = [] test_x = [] for i in xrange(26): test_x.append([]) test_y = [] csv_reader = csv.reader(open(os.path.dirname(os.path.abspath(__file__)) + '/data.csv~')) if size is None: size = -1 row_number = 0 for row in csv_reader: if row_number == size: break for i, element in enumerate(row): row[i] = np.float64(element) x, y = None, None if row_number < size / 2: x, y = train_x, train_y else: x, y = test_x, test_y for i in xrange(26): x[i].append(row[i]) y.append(row[26 + TemporalRelation.all_relations.index(predicate)]) row_number += 1 train_x = np.array(train_x).T test_x = np.array(test_x).T train_y = np.array(train_y) test_y = np.array(test_y) models = ffx.run(train_x, train_y, test_x, test_y, list(TemporalRelation.all_relations)) for model in models: print model
# Renaming the dataset columns # test.columns = ['X1','X2','X3','X4','X5','y'] XColsSize = test.shape[1] - 1 XColsName = ['X{}'.format(x + 1) for x in range(0, XColsSize)] FFXColsName = np.copy(XColsName) XColsName.append('y') XColsName test.columns = XColsName X = test.iloc[:, :-1] y = test.iloc[:, -1] # create training and testing datasets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) print(X_train.shape, y_train.shape) print(X_test.shape, y_test.shape) ## FFX import ffx models = ffx.run(X_train, y_train, X_test, y_test, FFXColsName) X_test_matrix = X_test.as_matrix() for model in models: y_pred = model.simulate(X_test_matrix) print(r2_score(y_test, y_pred)) print(model)
# this is the tiny example in the README, and now it # also prints out numBases and complexity for each model import numpy as np import ffx train_X = np.array([(1.5, 2, 3), (4, 5, 6)]).T train_y = np.array([1, 2, 3]) test_X = np.array([(5.241, 1.23, 3.125), (1.1, 0.124, 0.391)]).T test_y = np.array([3.03, 0.9113, 1.823]) models = ffx.run(train_X, train_y, test_X, test_y, ["a", "b"]) print("numBases: GP-complexity : model") for model in models: yhat = model.simulate(test_X) print(model.numBases(), ":", model.complexity(), ": ", model)
#!/usr/bin/env python import numpy as np import ffx # This creates a dataset of 1 predictor train_X = np.array([[0, 1, 2, 3]]).T train_y = np.array([0, 1, 4, 9]) test_X = np.array([[4, 5, 6, 7]]).T test_y = np.array([16, 25, 36, 49]) models = ffx.run(train_X, train_y, test_X, test_y, ["x"]) print('True model: y = x^2') print('Results:') print('Num bases,Test error (%),Model\n') for model in models: print('%10s, %13s, %s\n' % ('%d' % model.numBases(), '%.4f' % (model.test_nmse * 100.0), model))
#!/usr/bin/env python import numpy as np import ffx # This creates a dataset of 1 predictor train_X = np.array([[0, 1, 2, 3]]).T train_y = np.array([0, 1, 4, 9]) test_X = np.array([[4, 5, 6, 7]]).T test_y = np.array([16, 25, 36, 49]) hyper = {"_l1_ratio": 1} models = ffx.run(train_X, train_y, test_X, test_y, ["x"], hyper=hyper) print("True model: y = x^2") print("Results:") print("Num bases,Test error (%),Model\n") for model in models: print("%10s, %13s, %s\n" % ("%d" % model.numBases(), "%.4f" % (model.test_nmse * 100.0), model))
def FFX(dic, p_total = 100, p_train = 70 ,p_test = 30 , pop = []): """ :param dic: :param p_total: :param p_train: :param p_test: :param pop: :return: """ assert p_train + p_test <= 100 # dividing the domain into train, void and test parts length = len(dic[dic.keys()[0]]) init_train = 0 fin_train = int(length*float(p_total)/100.0*float(p_train)/100.0) init_void = fin_train + 1 fin_void = int(length*float(p_total)/100.0*float(100.0-p_test)/100.0) init_test = fin_void + 1 fin_test = int(length*float(p_total)/100.0) - 1 # eliminating some of the features new_dic = dic.copy() for k in pop: new_dic.pop(k, None) # Brings event as the last key element (both for regression and classification) if manip.is_in_list('ElNino_tau',new_dic.keys()): keys = new_dic.keys() keys.remove('ElNino_tau') keys.append('ElNino_tau') dic_train = {} dic_test = {} for k in new_dic.keys(): dic_train[k] = np.array([]) dic_test[k] = np.array([]) for i in range(init_train,fin_train+1): for k in new_dic.keys(): dic_train[k] = np.append(dic_train[k],new_dic[k][i]) for i in range(init_test,fin_test+1): for k in new_dic.keys(): dic_test[k] = np.append(dic_test[k],new_dic[k][i]) keys = sorted(new_dic.keys()) print keys keys.remove('ElNino_tau') keys.remove('t0') keys.append('ElNino_tau') keys.append('t0') y_train = dic_train['ElNino_tau'] x_train = np.zeros(shape=(len(y_train),len(keys)-2)) for i, t in enumerate(dic_train["t0"]): for k, key in enumerate(keys[:-2]): x_train[i,k] = dic_train[key][i] y_test = dic_test['ElNino_tau'] x_test = np.zeros(shape=(len(y_test),len(keys)-2)) for i, t in enumerate(dic_test["t0"]): for k, key in enumerate(keys[:-2]): x_test[i,k] = dic_test[key][i] keys.remove('t0') ffx.core.CONSIDER_THRESH = True models_ffx = ffx.run(x_train, y_train, x_test, y_test, keys) base_fxx = [model.numBases() for model in models_ffx] error_fxx = [model.test_nmse for model in models_ffx] model = models_ffx[-1] new_pred_FFX = np.array([]) for i in model.simulate(x_test): if i >= 0: new_pred_FFX = np.append(new_pred_FFX,i) else: new_pred_FFX = np.append(new_pred_FFX,0.0) time = np.array([]) for i in range(0,len(dic_test['t0'])): time = np.append(time,dic_test['t0'][i]) return time,y_test,new_pred_FFX
print('Test data starting year: {}'.format(test_starting_year)) # Prepare FFX inputs cur_train_X = cur_data_X[:train_size] cur_train_Y = cur_data_Y[:train_size] cur_test_X = cur_data_X[train_size:] cur_test_Y = cur_data_Y[train_size:] assert (cur_test_X.shape[0] == cur_test_Y.shape[0]) assert (cur_train_X.shape[0] == cur_train_Y.shape[0]) print('cur_train_X dim: {}'.format(cur_train_X.shape)) print('cur_test_X dim: {}'.format(cur_test_X.shape)) #models = xgp.XGPRegressor() #models.fit(train_X, train_Y) models = ffx.run(cur_train_X, cur_train_Y, cur_test_X, \ cur_test_Y, predictors) best_performing_model = {'sq_err': float('inf'), 'model': None} for model in models: #yhat = model.predict(test_X) #y = np.reshape(test_Y, test_Y.shape[0]) yhat = model.simulate(cur_test_X) y = np.reshape(cur_test_Y, cur_test_Y.shape[0]) print(' * {}'.format(model)) sq_err = np.sum(np.square(y - yhat)) print(' squared error= {}'.format(sq_err)) if sq_err < best_performing_model['sq_err']: best_performing_model['model'] = model best_performing_model['sq_err'] = sq_err best_performing_model['y'] = y best_performing_model['yhat'] = yhat best_performing_model['rms_err'] = np.sqrt(sq_err / len(yhat))
if __name__ == '__main__': # dir = '/home/oscar/Documents/AptanaStudio3Workspace/ffxREMOTERUN/RTD.csv' # d = InputData(dir, dir) d = InputData(str(sys.argv[1]), str(sys.argv[1])) train_y, test_y, ratio = d.scaleY(5) isploted = False if isploted: plt.plot(d.train_x.T[0], d.train_y) f = lambda V: 1 / ratio * ( 0.0199 - 2.29 * V**2 + 1.46 * V - 0.137 * V**2 + 0.124 * V) / ( 1.0 + 3.67 * V**2 - 3.64 * V - 0.261 * V + 0.224 * V**2) y = list(map(f, d.train_x.T[0])) plt.plot(d.train_x.T[0], y, 'r') plt.show() if not isploted: models = ffx.run(d.train_x, train_y, d.test_x, test_y, d.names) # print('Results:') disList = [] disList.append('Num bases, Test error (%), Model\n') for model in models: # print(model) disList.append('%10s, %13s, %25s\n' % ('%d' % model.numBases(), '%.4f' % (model.test_nmse * 100.0), model)) print(json.dumps(disList)) # with open("/home/oscar/Documents/AptanaStudio3Workspace/ffxREMOTERUN/output.txt", "a") as myfile: # myfile.write("appended text") # print(json.dumps(["IHateit", "NoIloveit"])) # use for debug ssh connection ## This has be right after print(json.dumps()) raise SystemExit("End")
# this is the tiny example in the README, and now it # also prints out numBases and complexity for each model import numpy as np import ffx train_X = np.array( [ (1.5,2,3), (4,5,6) ] ).T train_y = np.array( [1,2,3]) test_X = np.array( [ (5.241,1.23, 3.125), (1.1,0.124,0.391) ] ).T test_y = np.array( [3.03,0.9113,1.823]) models = ffx.run(train_X, train_y, test_X, test_y, ["a", "b"]) print("numBases: GP-complexity : model") for model in models: yhat = model.simulate(test_X) print(model.numBases(), ":", model.complexity(), ": ", model)
x2 = pd.DataFrame(x2) train_data = pd.concat([x1, x2], axis=1) train_y = 2.5 * np.power(train_data.iloc[:, 0], 4) - 1.3 * np.power( train_data.iloc[:, 0], 3) + 0.5 * np.power(train_data.iloc[:, 1], 2) - 1.7 * train_data.iloc[:, 0] xx1 = [] xx2 = [] for i in np.arange(31): for j in np.arange(31): xx1.append(-0.15 + 0.2 * i) xx2.append(-0.15 + 0.2 * j) xx1 = pd.DataFrame(xx1) xx2 = pd.DataFrame(xx2) test_data = pd.concat([xx1, xx2], axis=1) test_y = 2.5 * np.power(test_data.iloc[:, 0], 4) - 1.3 * np.power( test_data.iloc[:, 0], 3) + 0.5 * np.power(test_data.iloc[:, 1], 2) - 1.7 * test_data.iloc[:, 0] train_X = train_data test_X = test_data models = ffx.run(train_X, train_y, test_X, test_y, ["predictor_a", "predictor_b"]) for model in models: yhat = model.simulate(test_X) print(model) FFX = ffx.FFXRegressor() FFX.fit(train_X, train_y) print("Prediction:", FFX.predict(test_X)) print("Score:", FFX.score(test_X, test_y))