def __init__(self, n_estimators=10, criterion="gini", max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0., max_features="auto", max_leaf_nodes=None, min_impurity_decrease=0., min_impurity_split=None, bootstrap=True, oob_score=False, n_jobs=1, random_state=None, verbose=0, warm_start=False, class_weight=None): n_estimators = int(n_estimators) _skRandomForestClassifier.__init__( self, n_estimators, criterion, max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_features, max_leaf_nodes, min_impurity_decrease, min_impurity_split, bootstrap, oob_score, n_jobs, random_state, verbose, warm_start, class_weight) BaseWrapperClf.__init__(self)
def __init__(self, pmml, n_jobs=None): PMMLBaseClassifier.__init__(self, pmml) mining_model = self.root.find('MiningModel') if mining_model is None: raise Exception('PMML model does not contain MiningModel.') segmentation = mining_model.find('Segmentation') if segmentation is None: raise Exception('PMML model does not contain Segmentation.') if segmentation.get('multipleModelMethod') not in [ 'majorityVote', 'average' ]: raise Exception( 'PMML model ensemble should use majority vote or average.') # Parse segments segments = segmentation.findall('Segment') valid_segments = [ segment for segment in segments if segment.find('True') is not None ] if len(valid_segments) < len(segments): warnings.warn( 'Warning: {} segment(s) ignored because of unsupported predicate.' .format(len(segments) - len(valid_segments))) n_estimators = len(valid_segments) RandomForestClassifier.__init__(self, n_estimators=n_estimators, n_jobs=n_jobs) self._validate_estimator() clf = self._make_estimator(append=False, random_state=123) clf.classes_ = self.classes_ clf.n_features_ = self.n_features_ clf.n_outputs_ = self.n_outputs_ clf.n_classes_ = self.n_classes_ self.template_estimator = clf self.estimators_ = [self.get_tree(s) for s in valid_segments] # Required after constructing trees, because categories may be inferred in # the parsing process target = self.target_field.get('name') fields = [ field for name, field in self.fields.items() if name != target ] for clf in self.estimators_: n_categories = np.asarray([ len(self.field_mapping[field.get('name')][1].categories) if field.get('optype') == 'categorical' else -1 for field in fields if field.tag == 'DataField' ], dtype=np.int32, order='C') clf.n_categories = n_categories clf.tree_.set_n_categories(n_categories)
def __init__(self, colspec, random_state=4321, n_estimators=200): """ windows - list of window sizes to use in features See RandomForestClassifier docs for other parameters. """ RandomForestClassifier.__init__(self, n_estimators=n_estimators, random_state=random_state) self.colspec = vessel_scoring.colspec.Colspec(**colspec)
def __init__(self): RandomForestClassifier.__init__(self, n_estimators=100, criterion='gini', max_depth=None, min_samples_split=10, min_samples_leaf=1, max_features='auto', oob_score=True, n_jobs=-1, random_state=1, ) self.name = 'Random forest classifier'
def __init__(self): # Toggle settings here RandomForestClassifier.__init__(self, n_estimators=250, criterion='gini', max_features='log2', max_depth=None, min_samples_leaf=1, min_weight_fraction_leaf=0, max_leaf_nodes=None, min_impurity_decrease=0, bootstrap=True, random_state=None, verbose=0, warm_start=False, class_weight='balanced') self.__name = 'RFC'
def __init__(self, imbalance_upsampling=None, class_weight=None, method=None, log=None): MlModelCommon.__init__(self, imbalance_upsampling=imbalance_upsampling, class_weight=class_weight, method=method, log=log) # # Random forest is a special case of bagging of # decision tree. Might not make sense to # add ensemble method. # self.ensemble_method = None RandomForestClassifier.__init__(self, class_weight=class_weight, n_estimators=100, random_state=99)
def __init__(self, k=3000): if k != None: MutableRandomForestClassifier.K_features = k RandomForestClassifier.__init__(self,n_estimators=40, criterion='gini')
def __init__(self, **kwargs): RandomForestClassifier.__init__(self, kwargs) self.base_clf = RandomForestClassifier(bootstrap=False) self.clfs = {} self.unique_class = None
def __init__(self, n_estimators, countclf): RandomForestClassifier.__init__(self, n_estimators=n_estimators) self.countclf = countclf
#input training data into initial pandas dataframe train_df = pd.read_csv('train.csv',sep=",") test_df = pd.read_csv('test.csv',sep=",") train_data = munge_rest(train_df) test_data = munge_rest(test_df) #Delete revenue column from train data x = np.delete(train_data,37,1) revenue = train_data[:,37] '''TRAINING''' # Create the random forest object which will include all the parameters # for the fit forest = RandomForestClassifier(n_estimators = 100) forest.__init__(oob_score=True) # Fit the training data to the revenue and create the decision trees forest = forest.fit(x,revenue) #prints the oob score -- I think #print forest.score(x,revenue) #Compare elastic net and random forest SVM = ElasticNet(alpha = .05,normalize=True) RF = RandomForestClassifier(n_estimators = 50) y_rbf = RF.fit(x, revenue).predict(x) plt.scatter(x[:,1], revenue, c='k', label='data') plt.plot(x[:,7], y_rbf, c='b', label='RBF model') plt.title('Support Vector Regression') plt.legend() plt.show()
def __init__(self, featuredat='noisy_features.txt', metadata='Data_Batch_TDA3_all.txt', n_estimators=1000, max_features=3, min_samples_split=2): ''' Initialiser for classifier class. Class is a subclass of the RandomForestClassifier class from scikit-learn, with extra TESS simulation specific preparation and plotting functions. Parameters ---------------- featuredat: str Filepath leading to the features file, output from the FeatCalc class. metadata: str Filepath leading to the metadata file, e.g. Data_Batch1_noisy.txt n_estimators: int Number fo trees in the forest. See sklearn docs. max_features: int Max features per tree. See sklearn docs. min_samples_split: int Min samples required to split a node. See sklearn docs. Returns ----------------- NA, but will set up the groups array and train the classifier. Examples ----------------- A typical use of the class/function would follow: B = Classifier() class_probs = B(unclassified_features_array) For testing using the training data: B = Classifier() B.crossValidate() #may take some time - retrains the classifier for each training set member B.makeConfMatrix() B.plotConfMatrix() at this point a confusion matrix will be plotted. Cross-validated class probabilities will be available in B.cvprobs ''' RandomForestClassifier.__init__( self, n_estimators=n_estimators, max_features=max_features, min_samples_split=min_samples_split, class_weight='balanced') #initialise default RF print('Loading Data') self.metadata = np.genfromtxt(metadata, delimiter=',', dtype=None) self.metaids = self.metadata['f0'].astype('unicode') self.types = self.metadata['f10'].astype('unicode') self.features = np.genfromtxt(featuredat) print('Loaded') self.cvprobs = None self.cfmatrix = None print('Setting up groups array') self.groups = self.defineGroups() touse = self.groups > 0 self.groups = self.groups[touse] self.features = self.features[touse] print('Complete') print('Training Classifier') self.trainRF() print('Classifier trained')
def __init__(self,threshold=1,ll_ranking=False,**kwargs): RF.__init__(self,**kwargs) BaseClassifier.__init__(self,threshold=threshold,ll_ranking=ll_ranking)