def main(): data = read_csv("term-deposit-marketing-2020.csv") numeric_columns = ["age", "balance", "day", "duration", "campaign"] binary_columns = ["default", "housing", "loan", "y"] categorical_columns = ["job", "marital", "education", "contact", "month"] '''no etiketli veri sayısı, yes etiketli veri sayısından çok fazla olduğu için model yes etiketli verileri doğru tahmin edemiyor. Bu problemi aşmak için imbalanced learning yöntemlerinden smote kullanıldı.''' sm = SMOTE(sampling_strategy='auto', k_neighbors=2, random_state=0) kf = KFold(n_splits=5, shuffle=True) svc = SVC() c = 13 acc_scores, rc_scores = modeling(data, c, kf, svc, numeric_columns, binary_columns, categorical_columns, sm) print(statistics.mean(acc_scores)) print(statistics.mean(rc_scores))
from preprocess import get_data, get_permutations, read_csv from type_1_model import build_model from type_1_loader import DataGenerator, get_valid path = '/home/ubuntu/dataset/type1_train/{}' # path = '/Users/lvyufeng/Documents/captcha_train_set/type1_train/{type1_train_19990.jpg}' train_data, valid_data = read_csv(path.format('type1_train.csv')) training_generator = DataGenerator(train_data, path) x_valid, y_valid = get_valid(valid_data, path) model = build_model() model.summary() model.fit_generator(training_generator, epochs=3, validation_data=(x_valid, y_valid), max_queue_size=10, workers=1)
super().train(features, targets) self.model.fit(X=features, y=targets) def accuracy_score(self, features, targets): super().accuracy_score(features, targets) score = self.model.score(features, targets, self.model.class_weight) return score def abs_errors(self, features, targets): targets_pred = self.predict(features) result = abs(targets_pred - targets) return result if __name__ == '__main__': train_df = read_csv('../data/train_dataset.csv') train_y = train_df.outcome train_x = train_df.drop(columns=['outcome', 'date', 'team1', 'team2']) test_df = read_csv('../data/matches_to_predict.csv') test_y = test_df.outcome test_x = test_df.drop(columns=['outcome', 'date', 'team1', 'team2']) model = RandomForestModel(n_estimators=500, min_samples_split=5) model.train(train_x, train_y) model.save_model('../ckpts/random_forest_model.ckpt') # model = RandomForestModel(n_estimators=100) # model.load_model('../ckpts/random_forest_model.ckpt') # score = model.accuracy_score(train_x, train_y)
""" The main training script. Written by Tanmay Patil """ import matplotlib.pyplot as plt from model import create_model, train_model from preprocess import object_to_date_time, get_daily_weather_data, normalize_data, read_csv, get_training_data if __name__ == "__main__": df = read_csv("../data/processed/delhi_weather_data_processed.csv") df = object_to_date_time(df) daily_weather = get_daily_weather_data(df) daily_weather = normalize_data(daily_weather) X,y = get_training_data(daily_weather) X_train = X[:7300,::] X_test = X[7300:,::] y_train = y[:7300] y_test = y[7300:] model = create_model() history = train_model(model, X_train, y_train) train_loss = history.history['loss'] x_axis = [*range(1, len(train_loss + 1))] plt.title('Training Loss') plt.plot(x_axis, train_loss) plt.show()
if __name__ == '__main__': ckpt_dir = os.path.join(os.getcwd(), 'data', 'ckpts') input_dir = os.path.join(os.getcwd(), 'data', 'input') preprocess_dir = os.path.join(os.getcwd(), 'data', 'preprocess') simulate_dir = os.path.join(os.getcwd(), 'data', 'simulate') if not os.path.exists(ckpt_dir): os.makedirs(ckpt_dir) if not os.path.exists(input_dir): os.makedirs(input_dir) if not os.path.exists(preprocess_dir): os.makedirs(preprocess_dir) if not os.path.exists(simulate_dir): os.makedirs(simulate_dir) train_df = read_csv(os.path.join(preprocess_dir, 'train_dataset.csv')) train_y = train_df.outcome train_x = train_df.drop(columns=['outcome', 'date', 'team1', 'team2']) test_df = read_csv(os.path.join(preprocess_dir, 'matches_to_predict.csv')) test_y = test_df.outcome test_x = test_df.drop(columns=['outcome', 'date', 'team1', 'team2']) model = RandomForestModel(n_estimators=500, min_samples_split=5) model.train(train_x, train_y) model.save_model(os.path.join(ckpt_dir, 'random_forest_model.ckpt')) # model = RandomForestModel(n_estimators=100) # model.load_model('../ckpts/random_forest_model.ckpt') # score = model.accuracy_score(train_x, train_y)