def __init__(self, n_estimators, learning_rate, min_samples_split, min_impurity, max_depth, regression, debug): self.n_estimators = n_estimators self.learning_rate = learning_rate self.min_samples_split = min_samples_split self.min_impurity = min_impurity self.max_depth = max_depth self.init_estimate = None self.regression = regression self.debug = debug self.multipliers = [] # Square loss for regression # Log loss for classification self.loss = SquareLoss() if not self.regression: self.loss = LogisticLoss() # Initialize regression trees self.trees = [] for _ in range(n_estimators): tree = RegressionTree(min_samples_split=self.min_samples_split, min_impurity=min_impurity, max_depth=self.max_depth) self.trees.append(tree)
def __init__(self, n_estimators, learning_rate, min_samples_split, min_impurity, max_depth, regression, debug): self.n_estimators = n_estimators # Number of trees self.learning_rate = learning_rate self.min_samples_split = min_samples_split # The minimum n of sampels to justify split self.min_impurity = min_impurity # Minimum variance reduction to continue self.max_depth = max_depth # Maximum depth for tree self.init_estimate = None # The initial prediction of y self.regression = regression self.debug = debug # Square loss for regression # Log loss for classification self.loss = SquareLoss() if not self.regression: self.loss = LogisticLoss() # Initialize regression trees self.trees = [] for _ in range(n_estimators): tree = RegressionTree(min_samples_split=self.min_samples_split, min_impurity=min_impurity, max_depth=self.max_depth) self.trees.append(tree)
def __init__(self, n_estimators, # int 树的个数 learning_rate, # 梯度下降的学习速率 min_samples_split, # 每棵子树叶子节点中数据的最小数目 min_impurity, # 每棵子树的最小纯度 max_depth, # 每棵子树的最大深度 regression=True # boolean 是否为回归问题 ): self.n_estimators = n_estimators self.learning_rate = learning_rate self.min_samples_split = min_samples_split self.min_impurity = min_impurity self.max_depth = max_depth self.regression = regression self.bar = progressbar.ProgressBar(widgets=bar_widgets) if regression: self.loss = SquareLoss() else: self.loss = SoftmaxLoss() self.trees = [] for _ in range(self.n_estimators): tree = RegressionTree(min_samples_split=self.min_samples_split, min_impurity=self.min_impurity, max_depth=self.max_depth)
def __init__(self, n_estimators, learning_rate, max_depth=2, min_split_samples=2, min_impurity=1e-7, regression=False): self.n_estimators = n_estimators self.learning_rate = learning_rate self.regression = regression if self.regression: self.loss = SquareLoss() else: self.loss = CrossEntropy() self.trees = [] for _ in range(n_estimators): tree = RegressionTree(max_depth=max_depth, min_split_samples=min_split_samples, min_impurity=min_impurity) self.trees.append(tree)
def __init__(self, n_estimators=20, learning_rate=0.5, min_samples_split=20, min_var_red=1e-4, max_depth=4): self.n_estimators = n_estimators # Number of trees self.learning_rate = learning_rate self.min_samples_split = min_samples_split self.min_var_red = min_var_red # Minimum variance reduction to continue self.max_depth = max_depth # Maximum depth for tree self.init_estimate = None # Initialize regression trees self.trees = [] for _ in range(n_estimators): self.trees.append( RegressionTree(min_samples_split=self.min_samples_split, min_impurity=min_var_red, max_depth=self.max_depth))
def __init__(self, n_estimators, learning_rate, min_samples_split, min_impurity, max_depth, regression): self.n_estimators = n_estimators self.learning_rate = learning_rate self.min_samples_split = min_samples_split self.min_impurity = min_impurity self.max_depth = max_depth self.regression = regression self.bar = progressbar.ProgressBar(widgets=bar_widgets) # Square loss for regression # Log loss for classification self.loss = SquareLoss() if not self.regression: self.loss = CrossEntropy() # Initialize regression trees self.trees = [] for _ in range(n_estimators): tree = RegressionTree(min_samples_split=self.min_samples_split, min_impurity=min_impurity, max_depth=self.max_depth) self.trees.append(tree)
def main(): print("-- Regression Tree --") # Load temperature data data = pd.read_csv('../datasets/TempLinkoping2016.txt', sep="\t") time = np.atleast_2d(data["time"].as_matrix()).T temp = np.atleast_2d(data["temp"].as_matrix()).T X = standardize(time) # Time. Fraction of the year [0, 1] y = temp[:, 0] # Temperature. Reduce to one-dim X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) model = RegressionTree() model.fit(X_train, y_train) y_pred = model.predict(X_test) y_pred_line = model.predict(X) # Color map cmap = plt.get_cmap('viridis') mse = mean_squared_error(y_test, y_pred) print("Mean Squared Error:", mse) # Plot the results # Plot the results m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) m3 = plt.scatter(366 * X_test, y_pred, color='black', s=10) plt.suptitle("Regression Tree") plt.title("MSE: %.2f" % mse, fontsize=10) plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend((m1, m2, m3), ("Training data", "Test data", "Prediction"), loc='lower right') plt.show()
from decision_tree import RegressionTree # Get data from train dataset my_data = np.genfromtxt('./datasets/Train_Datasets.csv', delimiter=',', skip_header=1, dtype=None, encoding=None) X = np.zeros(shape=(1, 11)) y = np.zeros(shape=(1, 1)) for data in my_data: raw = list(data) data = np.array([raw[:-1]]) target = np.array([raw[-1]]) X = np.vstack((data, X)) y = np.vstack((target, y)) X, y = my_data[:, :-1], my_data[:, -1] models = RegressionTree() # X = X[:-1] # y = y[:-1] models.fit(X, y) filename = 'models_version_one.sav' pickle.dump(models, open(filename, 'wb')) loaded_model = pickle.load(open(filename, 'rb')) # Test predict for some input args = sys.argv[1:] if len(args) < 4: args = ['FDT07',5.82,'reg',0,'Fruits and Vegetables',256.633,'OUT049',1999,'Medium','Tier 1','Supermarket Type1'] print(loaded_model.predict(np.array([args])), 2050.664)