def main(): print("Reading the valid pairs") valid = data_io.read_valid_pairs() features = fe.feature_extractor() print("Transforming features") trans_valid = features.fit_transform(valid) trans_valid = np.nan_to_num(trans_valid) print("Saving Valid Features") data_io.save_valid_features(trans_valid) print("Loading the classifier") #(both_classifier, A_classifier, B_classifier, none_classifier) = data_io.load_model() classifier = data_io.load_model() print("Making predictions") valid_info = data_io.read_valid_info() predictions = list() curr_pred = None """ for i in range(len(trans_valid)): if valid_info["A type"][i] == "Numerical" and valid_info["B type"][i] == "Numerical": curr_pred = both_classifier.predict_proba(trans_valid[i, :]) elif valid_info["A type"][i] == "Numerical" and valid_info["B type"][i] != "Numerical": curr_pred = A_classifier.predict_proba(trans_valid[i, :]) elif valid_info["A type"][i] != "Numerical" and valid_info["B type"][i] == "Numerical": curr_pred = B_classifier.predict_proba(trans_valid[i, :]) else: curr_pred = none_classifier.predict_proba(trans_valid[i, :]) predictions.append(curr_pred[0][2] - curr_pred[0][0]) """ orig_predictions = classifier.predict_proba(trans_valid) predictions = orig_predictions[:, 2] - orig_predictions[:, 0] predictions = predictions.flatten() print("Writing predictions to file") data_io.write_submission(predictions)
def main(): y = data_io.read_train_target() X = data_io.load_train_features() if(type(X) == type(None)): print("No feature file found!") exit(1) X_old = data_io.load_features("./Models/old_csv/features_train_en_python.csv") print X.shape X = X_old.join(X) print X.shape #print X data_io.save_train_features(X,y) X = data_io.load_valid_features() X_old = data_io.load_features("./Models/old_csv/features_valid_en_python.csv") print X.shape X = X_old.join(X) print X.shape data_io.save_valid_features(X)
def main(): y = data_io.read_train_target() X = data_io.load_train_features() if (type(X) == type(None)): print("No feature file found!") exit(1) X_old = data_io.load_features( "./Models/old_csv/features_train_en_python.csv") print X.shape X = X_old.join(X) print X.shape #print X data_io.save_train_features(X, y) X = data_io.load_valid_features() X_old = data_io.load_features( "./Models/old_csv/features_valid_en_python.csv") print X.shape X = X_old.join(X) print X.shape data_io.save_valid_features(X)
def extract_valid_features(): start = time.time() features = feature_extractor() header = [] for h in features.features: header.append(h[0]) print("Reading the valid pairs") X = data_io.read_valid_pairs() print("Extracting features") # well, no fit data, so y = None extracted = features.fit_transform(X,y = None,type_map = data_io.read_valid_info()) elapsed = float(time.time() - start) print("Features extracted in " + str(elapsed/60.0) + " Minutes") print ("Saving features") X = pd.DataFrame(extracted, index = X.index) X.columns = header data_io.save_valid_features(X)
def extract_valid_features(): start = time.time() features = feature_extractor() header = [] for h in features.features: header.append(h[0]) print("Reading the valid pairs") X = data_io.read_valid_pairs() print("Extracting features") # well, no fit data, so y = None extracted = features.fit_transform(X, y=None, type_map=data_io.read_valid_info()) elapsed = float(time.time() - start) print("Features extracted in " + str(elapsed / 60.0) + " Minutes") print("Saving features") X = pd.DataFrame(extracted, index=X.index) X.columns = header data_io.save_valid_features(X)