Пример #1
0
 def predict(self, X):
     '''
     This function should provide predictions of labels on (test) data.
     Here we just return zeros...
     Make sure that the predicted values are in the correct format for the scoring
     metric. For example, binary classification problems often expect predictions
     in the form of a discriminant value (if the area under the ROC curve it the metric)
     rather that predictions of the class labels themselves. For multi-class or multi-labels
     problems, class probabilities are often expected if the metric is cross-entropy.
     Scikit-learn also has a function predict-proba, we do not require it.
     The function predict eventually can return probabilities.
     '''
     Prepro = Preprocessor()
     Prepro.pip0(10)
     Prepro.fit_transform(X,y=None)
     
     num_test_samples = len(X)
     if X.ndim>1: num_feat = len(X[0])
     print("PREDICT: dim(X)= [{:d}, {:d}]".format(num_test_samples, num_feat))
     if (self.num_feat != num_feat):
         print("ARRGH: number of features in X does not match training data!")
     print("PREDICT: dim(y)= [{:d}, {:d}]".format(num_test_samples, self.num_labels))
     output= self.clf.predict(X)
     
     return output
Пример #2
0
    def fit(self, X, y):
        '''
        This function should train the model parameters.
        Here we do nothing in this example...
        Args:
            X: Training data matrix of dim num_train_samples * num_feat.
            y: Training label matrix of dim num_train_samples * num_labels.
        Both inputs are numpy arrays.
        For classification, labels could be either numbers 0, 1, ... c-1 for c classe
        or one-hot encoded vector of zeros, with a 1 at the kth position for class k.
        The AutoML format support on-hot encoding, which also works for multi-labels problems.
        Use data_converter.convert_to_num() to convert to the category number format.
        For regression, labels are continuous values.
        '''
        Prepro = Preprocessor()
        Prepro.pip0(10)
        Prepro.fit_transform(X, y)
        
        
        self.num_train_samples = len(X)
        if X.ndim>1: self.num_feat = len(X[0])
        print("FIT: dim(X)= [{:d}, {:d}]".format(self.num_train_samples, self.num_feat))
        num_train_samples = len(y)
        if y.ndim>1: self.num_labels = len(y[0])
        print("FIT: dim(y)= [{:d}, {:d}]".format(num_train_samples, self.num_labels))
        if (self.num_train_samples != num_train_samples):
            print("ARRGH: number of samples in X and y do not match!")

        ###### Baseline models ######
        from sklearn.naive_bayes import GaussianNB
        from sklearn.linear_model import LinearRegression
        from sklearn.tree import DecisionTreeRegressor
        from sklearn.ensemble import RandomForestRegressor
        from sklearn.neighbors import KNeighborsRegressor
        from sklearn.svm import SVR
        # Comment and uncomment right lines in the following to choose the model
        #self.clf = GaussianNB()
        #self.clf = LinearRegression()
        #self.clf = DecisionTreeRegressor()
        #self.clf = RandomForestRegressor()
        #self.clf = KNeighborsRegressor()
        #self.clf = SVR(C=1.0, epsilon=0.2)
        if self.is_trained==False:
            self.clf=self.selection_hyperparam(X, y)
          #  self.clf=self.selection_hyperparam__(X, y)
             
          
        
        
        self.is_trained=True