def fit_predict_only(config, x_test=None, y_test=None): estimator = joblib.load('filename.pkl') learning_cfg = config.get("learning", None) scorers = set_scorer_functions(learning_cfg.get("scorer", ['mae', 'rmse'])) log.debug(x_test) if (x_test is not None): while x_test != 'null': log.debug("Predicting unseen data using the trained model...(from previously learned model)") y_hat = estimator.predict(x_test) #log.info(y_hat) #print y_hat sys.stdout.write(str(y_hat) + '\n') sys.stdout.flush() log.debug("Evaluating prediction on the test set...") #for scorer_name, scorer_func in scorers: #v = scorer_func(y_test, y_hat) #log.info("%s = %s" % (scorer_name, v)) n_input = raw_input() x_test = read_features_test(n_input, '\t')
def run(config): ''' Runs the main code of the program. Checks for mandatory parameters, opens input files and performs the learning steps. ''' # check if the mandatory parameters are set in the config file x_train_path = config.get("x_train", None) if not x_train_path: msg = "'x_train' option not found in the configuration file. \ The training dataset is mandatory." raise Exception(msg) y_train_path = config.get("y_train", None) if not y_train_path: msg = "'y_train' option not found in the configuration file. \ The training dataset is mandatory." raise Exception(msg) learning = config.get("learning", None) if not learning: msg = "'learning' option not found. At least one \ learning method must be set." raise Exception(msg) # checks for the optional parameters x_test_path = config.get("x_test", None) y_test_path = config.get("y_test", None) # output file output_file_path = config.get("output", None) separator = config.get("separator", DEFAULT_SEP) labels_path = config.get("labels", None) scale = config.get("scale", True) log.debug("Opening input files ...") log.debug("X_train: %s" % x_train_path) log.debug("y_train: %s" % y_train_path) log.debug("X_test: %s" % x_test_path) log.debug("y_test_path: %s" % y_test_path) # open feature and response files X_train, y_train, X_test, y_test, labels = \ open_datasets(x_train_path, y_train_path, x_test_path, y_test_path, separator, labels_path) if scale: # preprocess and execute mean removal X_train, X_test = scale_datasets(X_train, X_test) predict_only = config.get("predict_only", False) if predict_only: feats_lines = [] line = raw_input() X_test = read_features_test(line, '\t') y_test = 2 log.debug(X_test) y_hat = fit_predict_only(config, X_test, y_test) else: # fits training data and predicts the test set using the trained model y_hat = fit_predict(config, X_train, y_train, X_test, y_test)