示例#1
0
 def __init__(self,
              n_estimators=10,
              criterion="gini",
              max_depth=None,
              min_samples_split=2,
              min_samples_leaf=1,
              min_weight_fraction_leaf=0.,
              max_features="auto",
              max_leaf_nodes=None,
              min_impurity_decrease=0.,
              min_impurity_split=None,
              bootstrap=True,
              oob_score=False,
              n_jobs=1,
              random_state=None,
              verbose=0,
              warm_start=False,
              class_weight=None):
     n_estimators = int(n_estimators)
     _skRandomForestClassifier.__init__(
         self, n_estimators, criterion, max_depth, min_samples_split,
         min_samples_leaf, min_weight_fraction_leaf, max_features,
         max_leaf_nodes, min_impurity_decrease, min_impurity_split,
         bootstrap, oob_score, n_jobs, random_state, verbose, warm_start,
         class_weight)
     BaseWrapperClf.__init__(self)
示例#2
0
    def __init__(self, pmml, n_jobs=None):
        PMMLBaseClassifier.__init__(self, pmml)

        mining_model = self.root.find('MiningModel')
        if mining_model is None:
            raise Exception('PMML model does not contain MiningModel.')

        segmentation = mining_model.find('Segmentation')
        if segmentation is None:
            raise Exception('PMML model does not contain Segmentation.')

        if segmentation.get('multipleModelMethod') not in [
                'majorityVote', 'average'
        ]:
            raise Exception(
                'PMML model ensemble should use majority vote or average.')

        # Parse segments
        segments = segmentation.findall('Segment')
        valid_segments = [
            segment for segment in segments if segment.find('True') is not None
        ]

        if len(valid_segments) < len(segments):
            warnings.warn(
                'Warning: {} segment(s) ignored because of unsupported predicate.'
                .format(len(segments) - len(valid_segments)))

        n_estimators = len(valid_segments)
        RandomForestClassifier.__init__(self,
                                        n_estimators=n_estimators,
                                        n_jobs=n_jobs)
        self._validate_estimator()

        clf = self._make_estimator(append=False, random_state=123)
        clf.classes_ = self.classes_
        clf.n_features_ = self.n_features_
        clf.n_outputs_ = self.n_outputs_
        clf.n_classes_ = self.n_classes_
        self.template_estimator = clf

        self.estimators_ = [self.get_tree(s) for s in valid_segments]

        # Required after constructing trees, because categories may be inferred in
        # the parsing process
        target = self.target_field.get('name')
        fields = [
            field for name, field in self.fields.items() if name != target
        ]
        for clf in self.estimators_:
            n_categories = np.asarray([
                len(self.field_mapping[field.get('name')][1].categories)
                if field.get('optype') == 'categorical' else -1
                for field in fields if field.tag == 'DataField'
            ],
                                      dtype=np.int32,
                                      order='C')
            clf.n_categories = n_categories
            clf.tree_.set_n_categories(n_categories)
示例#3
0
 def __init__(self, colspec, random_state=4321, n_estimators=200):
     """
     windows - list of window sizes to use in features
     See RandomForestClassifier docs for other parameters.
     """
     RandomForestClassifier.__init__(self,
                                     n_estimators=n_estimators,
                                     random_state=random_state)
     self.colspec = vessel_scoring.colspec.Colspec(**colspec)
示例#4
0
 def __init__(self):
     
     RandomForestClassifier.__init__(self,
                                     n_estimators=100,
                                     criterion='gini',
                                     max_depth=None,
                                     min_samples_split=10,
                                     min_samples_leaf=1,
                                     max_features='auto',
                                     oob_score=True,
                                     n_jobs=-1,
                                     random_state=1,
                                     )
  
     self.name = 'Random forest classifier'
示例#5
0
    def __init__(self):

        # Toggle settings here
        RandomForestClassifier.__init__(self,
                                        n_estimators=250,
                                        criterion='gini',
                                        max_features='log2',
                                        max_depth=None,
                                        min_samples_leaf=1,
                                        min_weight_fraction_leaf=0,
                                        max_leaf_nodes=None,
                                        min_impurity_decrease=0,
                                        bootstrap=True,
                                        random_state=None,
                                        verbose=0,
                                        warm_start=False,
                                        class_weight='balanced')
        self.__name = 'RFC'
 def __init__(self,
              imbalance_upsampling=None,
              class_weight=None,
              method=None,
              log=None):
     MlModelCommon.__init__(self,
                            imbalance_upsampling=imbalance_upsampling,
                            class_weight=class_weight,
                            method=method,
                            log=log)
     #
     # Random forest is a special case of bagging of
     # decision tree. Might not make sense to
     # add ensemble method.
     #
     self.ensemble_method = None
     RandomForestClassifier.__init__(self,
                                     class_weight=class_weight,
                                     n_estimators=100,
                                     random_state=99)
 def __init__(self, k=3000):
     if k != None:
         MutableRandomForestClassifier.K_features = k
     RandomForestClassifier.__init__(self,n_estimators=40, criterion='gini')
示例#8
0
 def __init__(self, **kwargs):
     RandomForestClassifier.__init__(self, kwargs)
     self.base_clf = RandomForestClassifier(bootstrap=False)
     self.clfs = {}
     self.unique_class = None
示例#9
0
 def __init__(self, n_estimators, countclf):
     RandomForestClassifier.__init__(self, n_estimators=n_estimators)
     self.countclf = countclf
示例#10
0
#input training data into initial pandas dataframe
train_df = pd.read_csv('train.csv',sep=",")
test_df = pd.read_csv('test.csv',sep=",")   
    
train_data = munge_rest(train_df)
test_data = munge_rest(test_df)    
    
#Delete revenue column from train data
x = np.delete(train_data,37,1)
revenue = train_data[:,37]

'''TRAINING'''
# Create the random forest object which will include all the parameters
# for the fit
forest = RandomForestClassifier(n_estimators = 100)
forest.__init__(oob_score=True)
# Fit the training data to the revenue and create the decision trees
forest = forest.fit(x,revenue)

#prints the oob score -- I think
#print forest.score(x,revenue)

#Compare elastic net and random forest
SVM = ElasticNet(alpha = .05,normalize=True)
RF = RandomForestClassifier(n_estimators = 50)
y_rbf = RF.fit(x, revenue).predict(x)
plt.scatter(x[:,1], revenue, c='k', label='data')
plt.plot(x[:,7], y_rbf, c='b', label='RBF model')
plt.title('Support Vector Regression')
plt.legend()
plt.show()
示例#11
0
    def __init__(self,
                 featuredat='noisy_features.txt',
                 metadata='Data_Batch_TDA3_all.txt',
                 n_estimators=1000,
                 max_features=3,
                 min_samples_split=2):
        ''' Initialiser for classifier class. Class is a subclass of the 
        	RandomForestClassifier class from scikit-learn, with extra 
        	TESS simulation specific preparation and plotting functions.
        
        Parameters
        ----------------
        featuredat: str
            Filepath leading to the features file, output from the FeatCalc class.
            
        metadata: str
            Filepath leading to the metadata file, e.g. Data_Batch1_noisy.txt
            
        n_estimators: int
        	Number fo trees in the forest. See sklearn docs.
        	
        max_features: int
        	Max features per tree. See sklearn docs.
        	
        min_samples_split: int
        	Min samples required to split a node. See sklearn docs.
                    
        Returns
        -----------------
        NA, but will set up the groups array and train the classifier.
        
        Examples
        -----------------
        A typical use of the class/function would follow:
        	B = Classifier()
        	class_probs = B(unclassified_features_array)
        	
        For testing using the training data:
        	B = Classifier()
        	B.crossValidate()   #may take some time - retrains the classifier for each training set member
        	B.makeConfMatrix()
        	B.plotConfMatrix()
        at this point a confusion matrix will be plotted. Cross-validated class 
        probabilities will be available in B.cvprobs
        
        '''
        RandomForestClassifier.__init__(
            self,
            n_estimators=n_estimators,
            max_features=max_features,
            min_samples_split=min_samples_split,
            class_weight='balanced')  #initialise default RF

        print('Loading Data')
        self.metadata = np.genfromtxt(metadata, delimiter=',', dtype=None)
        self.metaids = self.metadata['f0'].astype('unicode')
        self.types = self.metadata['f10'].astype('unicode')
        self.features = np.genfromtxt(featuredat)
        print('Loaded')

        self.cvprobs = None
        self.cfmatrix = None

        print('Setting up groups array')
        self.groups = self.defineGroups()
        touse = self.groups > 0
        self.groups = self.groups[touse]
        self.features = self.features[touse]
        print('Complete')

        print('Training Classifier')
        self.trainRF()
        print('Classifier trained')
示例#12
0
 def __init__(self,threshold=1,ll_ranking=False,**kwargs):
     RF.__init__(self,**kwargs)
     BaseClassifier.__init__(self,threshold=threshold,ll_ranking=ll_ranking)