Пример #1
0
def main():

    data = read_csv("term-deposit-marketing-2020.csv")

    numeric_columns = ["age", "balance", "day", "duration", "campaign"]
    binary_columns = ["default", "housing", "loan", "y"]
    categorical_columns = ["job", "marital", "education", "contact", "month"]
    '''no etiketli veri sayısı, yes etiketli veri sayısından çok fazla 
    olduğu için model yes etiketli verileri doğru tahmin edemiyor.
    Bu problemi aşmak için imbalanced learning yöntemlerinden smote kullanıldı.'''
    sm = SMOTE(sampling_strategy='auto', k_neighbors=2, random_state=0)

    kf = KFold(n_splits=5, shuffle=True)
    svc = SVC()
    c = 13
    acc_scores, rc_scores = modeling(data, c, kf, svc, numeric_columns,
                                     binary_columns, categorical_columns, sm)
    print(statistics.mean(acc_scores))
    print(statistics.mean(rc_scores))
Пример #2
0
from preprocess import get_data, get_permutations, read_csv
from type_1_model import build_model
from type_1_loader import DataGenerator, get_valid

path = '/home/ubuntu/dataset/type1_train/{}'
# path = '/Users/lvyufeng/Documents/captcha_train_set/type1_train/{type1_train_19990.jpg}'
train_data, valid_data = read_csv(path.format('type1_train.csv'))

training_generator = DataGenerator(train_data, path)
x_valid, y_valid = get_valid(valid_data, path)
model = build_model()
model.summary()
model.fit_generator(training_generator,
                    epochs=3,
                    validation_data=(x_valid, y_valid),
                    max_queue_size=10,
                    workers=1)
        super().train(features, targets)
        self.model.fit(X=features, y=targets)

    def accuracy_score(self, features, targets):
        super().accuracy_score(features, targets)
        score = self.model.score(features, targets, self.model.class_weight)
        return score

    def abs_errors(self, features, targets):
        targets_pred = self.predict(features)
        result = abs(targets_pred - targets)
        return result


if __name__ == '__main__':
    train_df = read_csv('../data/train_dataset.csv')
    train_y = train_df.outcome
    train_x = train_df.drop(columns=['outcome', 'date', 'team1', 'team2'])

    test_df = read_csv('../data/matches_to_predict.csv')
    test_y = test_df.outcome
    test_x = test_df.drop(columns=['outcome', 'date', 'team1', 'team2'])

    model = RandomForestModel(n_estimators=500, min_samples_split=5)
    model.train(train_x, train_y)
    model.save_model('../ckpts/random_forest_model.ckpt')

    # model = RandomForestModel(n_estimators=100)
    # model.load_model('../ckpts/random_forest_model.ckpt')

    # score = model.accuracy_score(train_x, train_y)
Пример #4
0
"""
The main training script.

Written by Tanmay Patil
"""
import matplotlib.pyplot as plt
from model import create_model, train_model
from preprocess import object_to_date_time, get_daily_weather_data, normalize_data, read_csv, get_training_data



if __name__ == "__main__":
    df = read_csv("../data/processed/delhi_weather_data_processed.csv")
    df = object_to_date_time(df)
    daily_weather = get_daily_weather_data(df)
    daily_weather = normalize_data(daily_weather)
    X,y = get_training_data(daily_weather)
    X_train = X[:7300,::]
    X_test = X[7300:,::]
    y_train = y[:7300]
    y_test = y[7300:]
    model = create_model()
    history = train_model(model, X_train, y_train)
    train_loss = history.history['loss']
    x_axis = [*range(1, len(train_loss + 1))]
    plt.title('Training Loss')
    plt.plot(x_axis, train_loss)
    plt.show()
Пример #5
0
if __name__ == '__main__':
    ckpt_dir = os.path.join(os.getcwd(), 'data', 'ckpts')
    input_dir = os.path.join(os.getcwd(), 'data', 'input')
    preprocess_dir = os.path.join(os.getcwd(), 'data', 'preprocess')
    simulate_dir = os.path.join(os.getcwd(), 'data', 'simulate')

    if not os.path.exists(ckpt_dir):
        os.makedirs(ckpt_dir)
    if not os.path.exists(input_dir):
        os.makedirs(input_dir)
    if not os.path.exists(preprocess_dir):
        os.makedirs(preprocess_dir)
    if not os.path.exists(simulate_dir):
        os.makedirs(simulate_dir)

    train_df = read_csv(os.path.join(preprocess_dir, 'train_dataset.csv'))
    train_y = train_df.outcome
    train_x = train_df.drop(columns=['outcome', 'date', 'team1', 'team2'])

    test_df = read_csv(os.path.join(preprocess_dir, 'matches_to_predict.csv'))
    test_y = test_df.outcome
    test_x = test_df.drop(columns=['outcome', 'date', 'team1', 'team2'])

    model = RandomForestModel(n_estimators=500, min_samples_split=5)
    model.train(train_x, train_y)
    model.save_model(os.path.join(ckpt_dir, 'random_forest_model.ckpt'))

    # model = RandomForestModel(n_estimators=100)
    # model.load_model('../ckpts/random_forest_model.ckpt')

    # score = model.accuracy_score(train_x, train_y)