def __init__(self, n_estimators=10, criterion='mse', max_depth=None, min_samples_split=2, min_samples_leaf=1, max_features='auto', max_leaf_nodes=None, bootstrap=True, oob_score=False, n_jobs=1, random_state=None, verbose=0, min_density=None, compute_importances=None): RandomForestRegressor.__init__(self, n_estimators=n_estimators, criterion=criterion, max_depth=max_depth, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, max_features=max_features, max_leaf_nodes=max_leaf_nodes, bootstrap=bootstrap, compute_importances=compute_importances, oob_score=oob_score, n_jobs=n_jobs, random_state=random_state, verbose=verbose)
def __init__(self, n_estimators=10, criterion="mse", max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0., max_features="auto", max_leaf_nodes=None, min_impurity_decrease=0., min_impurity_split=None, bootstrap=True, oob_score=False, n_jobs=1, random_state=None, verbose=0, warm_start=False): n_jobs = 4 n_estimators = int(n_estimators) _RandomForestRegressor.__init__( self, n_estimators, criterion, max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_features, max_leaf_nodes, min_impurity_decrease, min_impurity_split, bootstrap, oob_score, n_jobs, random_state, verbose, warm_start) BaseWrapperReg.__init__(self)
def __init__(self, n_estimators=10, random_state=1, X_train=None, Y_train=None, data=None): """[summary] Args: n_estimators (int, optional): Nombre d'arbres de décision. 10 par défaut. random_state (int, optional): Seed de l'aléatoire. 1 par défaut. X_train (df, optional): Données applaties en entrée. Y_train (df, optional): Données de sortie. data (df, optional): Données non applaties en entrée. """ RandomForestRegressor.__init__(self, n_estimators=n_estimators, random_state=random_state) self.X_train = X_train self.Y_train = Y_train self.data = data if not os.path.exists("./model"): os.makedirs("./model")
def __init__(self, n_estimators=10, criterion='mse', max_depth=None, min_samples_split=2, min_samples_leaf=1, max_features='auto', max_leaf_nodes=None, bootstrap=True, oob_score=False, n_jobs=1, random_state=None, verbose=0, min_density=None, compute_importances=None): RandomForestRegressor.__init__(self, n_estimators=n_estimators, criterion=criterion, max_depth=max_depth, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, max_features=max_features, max_leaf_nodes= max_leaf_nodes, bootstrap= bootstrap, compute_importances=compute_importances, oob_score=oob_score, n_jobs=n_jobs, random_state=random_state, verbose=verbose)
def __init__(self, k=1000, var_threshold=0, **kwargs): self.k = k RandomForestRegressor.__init__(self, **kwargs) self.filter = SelectKBest(score_func=f_regression, k=k) self.var_threshold = var_threshold
def __init__(self, reserved_columns=[], **kwargs): self.reserved_columns = reserved_columns RandomForestRegressor.__init__(self, **kwargs) self.feature_names = []
def __init__(self, pmml, n_jobs=None): PMMLBaseRegressor.__init__(self, pmml) mining_model = self.root.find('MiningModel') if mining_model is None: raise Exception('PMML model does not contain MiningModel.') segmentation = mining_model.find('Segmentation') if segmentation is None: raise Exception('PMML model does not contain Segmentation.') if segmentation.get('multipleModelMethod') not in [ 'majorityVote', 'average' ]: raise Exception( 'PMML model ensemble should use majority vote or average.') # Parse segments segments = segmentation.findall('Segment') valid_segments = [ segment for segment in segments if segment.find('True') is not None ] if len(valid_segments) < len(segments): warnings.warn( 'Warning: {} segment(s) ignored because of unsupported predicate.' .format(len(segments) - len(valid_segments))) n_estimators = len(valid_segments) self.n_outputs_ = 1 RandomForestRegressor.__init__(self, n_estimators=n_estimators, n_jobs=n_jobs) self._validate_estimator() clf = self._make_estimator(append=False, random_state=123) try: clf.n_features_in_ = self.n_features_in_ except AttributeError: clf.n_features_ = self.n_features_ clf.n_outputs_ = self.n_outputs_ self.template_estimator = clf self.estimators_ = [ get_tree(self, s, rescale_factor=0.1) for s in valid_segments ] # Required after constructing trees, because categories may be inferred in # the parsing process target = self.target_field.get('name') fields = [ field for name, field in self.fields.items() if name != target ] for clf in self.estimators_: n_categories = np.asarray([ len(self.field_mapping[field.get('name')][1].categories) if field.get('optype') == 'categorical' else -1 for field in fields if field.tag == 'DataField' ], dtype=np.int32, order='C') clf.n_categories = n_categories clf.tree_.set_n_categories(n_categories) self.categorical = [x != -1 for x in self.estimators_[0].n_categories]
def __init__(self, **kwargs): RandomForestRegressor.__init__(self, **kwargs) self.tree_means_per_leaf = [] self.tree_vars_per_leaf = []
def __init__(self): RandomForestRegressor.__init__(self, n_estimators=RandomForest.n_est, warm_start=True) OnlineScorer.__init__(self, batch_size=RandomForest.batch)
def __init__(self, *args, **kargs): RandomForestRegressor.__init__(self, *args, **kargs) self.weight = np.array([1]).astype(np.float32) self.weight_gpu = cuda.mem_alloc(self.weight.nbytes)
train_data = munge_rest(train_df) test_data = munge_rest(test_df) #Delete revenue column from train data x = np.delete(train_data, 37, 1) #Define revenue as target variable revenue = train_data[:, 37] '''TRAINING''' # Create the random forest object which will include all the parameters # for the fit forest = RandomForestRegressor(n_estimators=100, max_depth=None, max_features='sqrt', min_samples_split=3) forest.__init__(oob_score=True) # Fit the training data to the revenue and create the decision trees forest = forest.fit(x, train_data[0::, 37]) # Try the Ridge Regression model clf = linear_model.Ridge(alpha=0.75, fit_intercept=True, normalize=True, copy_X=True, max_iter=1000, tol=0.015) #Fit the training data to the revenue and create the Ridge regression model Ridge = clf.fit(x, train_data[0::, 37]) #prints the oob score -- I think #print forest.score(train_data[0::,1::],train_data[0::,37])