def train_model(X_df, y_array, skf_is):
    fe = feature_extractor.FeatureExtractor()
    fe.fit(X_df, y_array)
    X_array = fe.transform(X_df)
    # Regression
    train_is, _ = skf_is
    X_train_array = np.array([X_array[i] for i in train_is])
    y_train_array = np.array([y_array[i] for i in train_is])
    reg = regressor.Regressor()
    reg.fit(X_train_array, y_train_array)
    return fe, reg
Пример #2
0
def train_submission(module_path, X_df, y_array, train_is):
    # Preparing the training set
    X_train_df = X_df.iloc[train_is]
    y_train_array = y_array[train_is]

    # Feature extraction
    import feature_extractor
    fe = feature_extractor.FeatureExtractor()
    fe.fit(X_train_df, y_train_array)
    X_train_array = fe.transform(X_train_df)

    import regressor
    reg = regressor.Regressor()
    reg.fit(X_train_array, y_train_array)
    return fe, reg
Пример #3
0
def main(argv):

	df_train = pd.read_csv('data/train.csv')
	df_test = pd.read_csv('data/test.csv')
	df_answer = pd.DataFrame()

	df_train, df_test, df_answer = process_data(df_train, df_test, df_answer)

	label = df_train['NU_NOTA_MT']

	df_train.drop(['NU_NOTA_MT'], axis=1, inplace=True)

	regression_model = rg.Regressor(df_train, df_test, df_answer, label)

	regression_model.auto_sklearn(time=int(sys.argv[1]))

	regression_model.prediction()

	regression_model.save_model('my_model')

	regression_model.save_answer('automl_answer')

	convert_to_zero('automl_answer')
Пример #4
0
        y_train_reg = y_train_df['concentration'].values
        y_test_clf = y_test_df['molecule'].values
        y_test_reg = y_test_df['concentration'].values

        fe_clf = feature_extractor_clf.FeatureExtractorClf()
        fe_clf.fit(X_train_df, y_train_clf)
        X_train_array_clf = fe_clf.transform(X_train_df)
        X_test_array_clf = fe_clf.transform(X_test_df)

        clf = classifier.Classifier()
        clf.fit(X_train_array_clf, y_train_clf)
        y_proba_clf = clf.predict_proba(X_test_array_clf)
        y_pred_clf = labels[np.argmax(y_proba_clf, axis=1)]
        error = 1 - accuracy_score(y_test_clf, y_pred_clf)
        print('error = %s' % error)

        fe_reg = feature_extractor_reg.FeatureExtractorReg()
        for i, label in enumerate(labels):
            X_train_df.loc[:, label] = (y_train_df['molecule'] == label)
            X_test_df.loc[:, label] = y_proba_clf[:, i]
        fe_reg.fit(X_train_df, y_train_reg)
        X_train_array_reg = fe_reg.transform(X_train_df)
        X_test_array_reg = fe_reg.transform(X_test_df)

        reg = regressor.Regressor()
        reg.fit(X_train_array_reg, y_train_reg)
        y_pred_reg = reg.predict(X_test_array_reg)
        mare = mare_score(y_test_reg, y_pred_reg)
        print('mare = ', mare)
        print('combined error = ', 2. / 3 * error + 1. / 3 * mare)
Пример #5
0
    def process(self, campaign_configuration, regression_inputs, processes_number):
        """
        Perform the actual regression

        Parameters
        ----------
        campaign_configuration: dictionary
            The set of options specified by the user though command line and campaign configuration files

        regression_inputs: RegressionInputs
            The input of the regression problem
        """
        self._logger.info("-->Generate generators")
        factory = gf.GeneratorsFactory(campaign_configuration, self._random_generator.random())
        top_generator = factory.build()
        self._logger.info("<--")
        self._logger.info("-->Generate experiments")
        expconfs = top_generator.generate_experiment_configurations([], regression_inputs)
        self._logger.info("<--")

        assert expconfs
        if processes_number == 1:
            self._logger.info("-->Run experiments (sequentially)")
            for exp in tqdm.tqdm(expconfs, dynamic_ncols=True):
                exp.train()
            self._logger.info("<--")
        else:
            self._logger.info("-->Run experiments (in parallel)")
            pool = multiprocessing.Pool(processes_number)
            expconfs = list(tqdm.tqdm(pool.imap(process_wrapper, expconfs), total=len(expconfs)))
            self._logger.info("<--")

        self._logger.info("-->Collecting results")
        results = re.Results(campaign_configuration, expconfs)
        results.collect_data()
        self._logger.info("<--Collected")

        for metric, mapes in results.raw_results.items():
            for experiment_configuration, mape in mapes.items():
                self._logger.debug("%s of %s is %f", metric, experiment_configuration, mape)

        best_confs, best_technique = results.get_bests()
        best_regressors = {}
        self._logger.info("-->Building the final regressors")

        # Create a shadow copy
        all_data = regression_inputs.copy()

        # Set all sets equal to whole input set
        all_data.inputs_split["training"] = all_data.inputs_split["all"]
        all_data.inputs_split["validation"] = all_data.inputs_split["all"]
        all_data.inputs_split["hp_selection"] = all_data.inputs_split["all"]

        for technique in best_confs:
            best_conf = best_confs[technique]
            # Get information about the used x_columns
            all_data.x_columns = best_conf.get_x_columns()

            if 'normalization' in campaign_configuration['DataPreparation'] and campaign_configuration['DataPreparation']['normalization']:
                # Restore non-normalized columns
                for column in all_data.scaled_columns:
                    all_data.data[column] = all_data.data["original_" + column]
                    all_data.data = all_data.data.drop(columns=["original_" + column])

                all_data.scaled_columns = []
                self._logger.debug("Denormalized inputs are:%s\n", str(all_data))

                # Normalize
                normalizer = data_preparation.normalization.Normalization(campaign_configuration)
                all_data = normalizer.process(all_data)

            # Set training set
            best_conf.set_training_data(all_data)

            # Train
            best_conf.train()
            best_conf.evaluate()
            self._logger.info("Validation MAPE on full dataset for %s: %s", technique, str(best_conf.mapes["validation"]))

            # Build the regressor
            best_regressors[technique] = regressor.Regressor(campaign_configuration, best_conf.get_regressor(), best_conf.get_x_columns(), all_data.scalers)
            pickle_file_name = os.path.join(campaign_configuration['General']['output'], ec.enum_to_configuration_label[technique] + ".pickle")
            pickle_file = open(pickle_file_name, "wb")
            pickle.dump(best_regressors[technique], pickle_file)
            pickle_file.close()
        self._logger.info("<--Built the final regressors")

        # Return the regressor
        return best_regressors[best_technique]
Пример #6
0
# Module to run model-agnostic meta-learning supervised regression experiments.

import torch

import experiments
import regressor
import utility

if __name__ == "__main__":
    input_arguments = utility.parse_input_arguments()

    utility.control_randomness(input_arguments.seed)
    torch_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Create and train baseline regressor.
    baseline_regressor = regressor.Regressor(0.01, torch_device)
    experiments.train_baseline_regressor(
        baseline_regressor, input_arguments.baseline_training_iterations,
        input_arguments.batch_size, torch_device)

    # Create and train model-agnostic meta-learning regressor.
    maml_regressor = regressor.Regressor(0.01, torch_device)
    experiments.train_maml_regressor(maml_regressor,
                                     input_arguments.meta_training_iterations,
                                     input_arguments.meta_batch_size,
                                     input_arguments.batch_size, torch_device)

    # Evaluate the trained regressors, and create and save the test result plots.
    test_results = experiments.test_regressors(baseline_regressor,
                                               maml_regressor, 100,
                                               input_arguments.batch_size,
Пример #7
0
x = dataset.drop(['price','cut','color','clarity'],axis = 1)
y = dataset['price']

x = prepro.scale(x)

encode_col = dataset[['cut','color','clarity']]
encode_col  = prepro.encode(encode_col)

x = np.concatenate((x,encode_col),axis=1)

X_train, X_test, y_train, y_test = train_test_split(x, y,random_state=0,test_size=0.33)

vis.Visualizer().scatterplot(X_test[:,0],y_test.iloc[:])

# Linear Regression
regressor = reg.Regressor(type=reg.LINEAR_REGRESSION)
regressor.fit(X_train, y_train)
print("******************Linear Regression******************")
print(regressor.score(X_test,y_test))
#vis.Visualizer().scatterplot(X_test[:,0],y_test.iloc[:],regressor)
print("*************************************************")



# polynomial Regression
params = dict(degree = 5)
regressor = reg.Regressor(type=reg.POLY_REGRESSION, **params)
regressor.fit(X_train, y_train)
print("**************Polynomial Regression***************")
#print(regressor.score(X_test,y_test))
print("*************************************************")