Python make_submission примеры, utils.make_submission Python примеры использования

Пример #1

0

Показать файл

Файл: main4.py Проект: BellaMENG/COMP-4901-NLP-2018-Fall

def main(opt):
    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu
    if opt.mode == "train":
        st = time.time()
        print('Loading data')
        x_train, y_train, x_valid, y_valid, vocabulary_size = load_data(
            "data", opt.debug)

        num_training_data = x_train.shape[0]
        sequence_length = x_train.shape[1]
        print(num_training_data)

        print('Vocab Size', vocabulary_size)

        model = build_model(opt.embedding_dim, opt.hidden_size, opt.drop,
                            sequence_length, vocabulary_size)
        print("Traning Model...")
        history = model.fit(
            x_train,
            y_train,
            batch_size=opt.batch_size,
            epochs=opt.epochs,
            verbose=1,
            callbacks=[TestCallback((x_valid, y_valid), model=model)])
        model.save(opt.saved_model)
        print("Training cost time: ", time.time() - st)

    elif opt.mode == "ensemble":
        model1 = load_model(opt.saved_model1)
        model1.name = 'model1'
        for layer in model1.layers:
            layer.name = layer.name + str("_1")
        model2 = load_model(opt.saved_model2)
        model2.name = 'model2'
        for layer in model2.layers:
            layer.name = layer.name + str("_2")
        models = [model1, model2]

        vocabulary = json.load(open(os.path.join("data", "vocab.json")))
        predict_dict = predict_final_word_models(models, vocabulary, opt.input)
        sub_file = make_submission(predict_dict, opt.student_id, opt.input)
        if opt.score:
            scoring(sub_file, os.path.join("data"), type="valid")


#         x_train, y_train, x_valid, y_valid, vocabulary_size = load_data(
#             "data", opt.debug)
#         num_training_data = x_train.shape[0]
#         sequence_length = x_train.shape[1]
#         model_inputs = Input(shape=(sequence_length,), dtype='int32')
#         model = ensemble(models, model_inputs)
#         model.save(opt.model_to_be_saved)

    else:
        model = load_model(opt.saved_model)
        vocabulary = json.load(open(os.path.join("data", "vocab.json")))
        predict_dict = predict_final_word(model, vocabulary, opt.input)
        sub_file = make_submission(predict_dict, opt.student_id, opt.input)
        if opt.score:
            scoring(sub_file, os.path.join("data"), type="valid")

Пример #2

0

Показать файл

Файл: main.py Проект: gusrb415/TextEncoder

def main(opt):
    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu
    if opt.mode == "train":
        st = time.time()
        print('Loading data')
        x_train, y_train, x_valid, y_valid, vocabulary_size = load_data(
            "data", opt.debug)

        num_training_data = x_train.shape[0]
        sequence_length = x_train.shape[1]
        print(num_training_data)

        print('Vocab Size', vocabulary_size)

        model = build_model(opt.embedding_dim, opt.hidden_size, opt.drop1,
                            opt.drop2, sequence_length, vocabulary_size)
        print("Training Model...")
        model.fit(x_train,
                  y_train,
                  batch_size=opt.batch_size,
                  epochs=opt.epochs,
                  verbose=2,
                  callbacks=[TestCallback((x_valid, y_valid), model=model)])
        model.save(opt.saved_model)
        print("Training cost time: ", time.time() - st)
    else:
        if opt.mode == "score_valid":
            model = load_model(opt.saved_model)
            vocabulary = json.load(open(os.path.join("data", "vocab.json")))
            predict_dict = predict_final_word([model], vocabulary, opt.input)
            sub_file = make_submission(predict_dict, opt.student_id, opt.input)
            scoring(sub_file, os.path.join("data"), type="valid")
        else:
            model0 = load_model('models/model0.h5')
            model1 = load_model('models/model1.h5')
            model2 = load_model('models/model2.h5')
            model3 = load_model('models/model3.h5')
            model4 = load_model('models/model4.h5')
            model5 = load_model('models/model5.h5')
            model6 = load_model('models/model6.h5')
            model7 = load_model('models/model7.h5')
            model8 = load_model('models/model8.h5')
            model9 = load_model('models/model9.h5')
            model_list = [
                model0, model1, model2, model3, model4, model5, model6, model7,
                model8, model9
            ]
            vocabulary = json.load(open(os.path.join("data", "vocab.json")))
            predict_dict = predict_final_word(model_list, vocabulary,
                                              opt.input)
            sub_file = make_submission(predict_dict, opt.student_id, opt.input)
            scoring(sub_file, os.path.join("data"), type="valid")

Пример #3

0

Показать файл

def main(opt):
    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu
    if opt.mode == "train":
        st = time.time()
        print('Loading data')
        x_train, y_train, x_valid, y_valid, vocabulary_size = load_data(
            "data", opt.debug)

        num_training_data = x_train.shape[0]
        sequence_length = x_train.shape[1]
        print(num_training_data)

        print('Vocab Size', vocabulary_size)

        model = build_model(opt.embedding_dim, opt.hidden_size, opt.drop,
                            sequence_length, vocabulary_size, opt.optimizer)
        print("Traning Model...")
        history = model.fit(
            x_train,
            y_train,
            batch_size=opt.batch_size,
            epochs=opt.epochs,
            verbose=1,
            callbacks=[TestCallback((x_valid, y_valid), model=model)])
        model.save(opt.saved_model)
        print("Training cost time: ", time.time() - st)

    else:
        model = load_model(opt.saved_model)
        vocabulary = json.load(open(os.path.join("data", "vocab.json")))
        predict_dict = predict_final_word(model, vocabulary, opt.input)
        sub_file = make_submission(predict_dict, opt.student_id, opt.input)
        if opt.score:
            scoring(sub_file, os.path.join("data"), type="valid")

Пример #4

0

Показать файл

def main():
    t = Timer()
    seed_everything(cfg.common.seed)

    logger_path.mkdir(exist_ok=True)
    logging.basicConfig(filename=logger_path / 'train.log',
                        level=logging.DEBUG)

    dh.save(logger_path / 'config.yml', cfg)

    with t.timer('load data'):
        train_df = pd.read_csv(const.TRAIN_PATH)
        test_df = pd.read_csv(const.TEST_PATH)

    with t.timer('make folds'):
        fold_df = factory.get_fold(cfg.validation, train_df)
        if cfg.validation.single:
            fold_df = fold_df[['fold_0']]
            fold_df /= fold_df['fold_0'].max()

    with t.timer('drop index'):
        if cfg.common.drop is not None:
            drop_idx = factory.get_drop_idx(cfg.common.drop)
            train_df = train_df.drop(drop_idx, axis=0).reset_index(drop=True)
            fold_df = fold_df.drop(drop_idx, axis=0).reset_index(drop=True)

    with t.timer('train model'):
        trainer = NNTrainer(run_name, fold_df, cfg)
        cv = trainer.train(train_df=train_df,
                           target_df=train_df[const.TARGET_COL])
        preds = trainer.predict(test_df)
        trainer.save()

        run_name_cv = f'{run_name}_{cv:.3f}'
        logger_path.rename(f'../logs/{run_name_cv}')
        logging.disable(logging.FATAL)

    with t.timer('make submission'):
        make_submission(run_name=run_name_cv,
                        y_pred=preds,
                        target_name='Label',
                        comp=False)
        if cfg.common.kaggle.submit:
            kaggle = Kaggle(cfg.compe.name, run_name_cv)
            kaggle.submit(comment)

Пример #5

0

Показать файл

Файл: main.py Проект: ireko8/Malware

def main():
    experiment_name = now()
    cv_path = Path(f"result/{experiment_name}")
    cv_path.mkdir(parents=True)

    copy_script(cv_path)
    log = Logger(experiment_name, cv_path / "exp.log")

    log.info("load data")
    with log.interval_timer("load data"):
        train_X = load_fs_tosh('all_snap', conf)
        train_y = feather.read_dataframe("features/HasDetections.ftr")
        train_y = train_y.HasDetections
        test = load_fs_tosh('all_snap', conf, test=True)

    log.info(pformat(list(train_X.columns)))
    cv = StratifiedKFold(n_splits=5, random_state=conf.seed)
    cv = cv.split(train_X, train_y)

    log.info("learning start")
    log.double_kiritori()
    with open('features/NN/conf_tosh_all_snap.pkl', 'rb') as p:
        embedd_conf = pickle.load(p)
    log.info(pformat(embedd_conf))
    score, pred, meta = NN_cv(train_X,
                              train_y,
                              cv,
                              log,
                              cv_path,
                              X_test=test,
                              split_conf=embedd_conf)
    log.info(score)
    log.double_kiritori()
    log.info("done")

    del train_X, train_y

    np.save(cv_path / "test_preds.npy", pred)
    np.save(cv_path / "oof_preds.npy", meta)

    make_submission(pred, f"submissions/{experiment_name}.csv.gz")

Пример #6

0

Показать файл

Файл: main.py Проект: tea1528/language_model

def main(opt):
    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu
    if opt.mode == "train":
        st = time.time()
        print('Loading data')
        x_train, y_train, x_valid, y_valid, vocabulary_size = load_data(
            "data", opt.debug)

        num_training_data = x_train.shape[0]
        sequence_length = x_train.shape[1]
        print(num_training_data)

        print('Vocab Size', vocabulary_size)

        model = build_model(opt.embedding_dim, opt.hidden_size, opt.drop,
                            sequence_length, vocabulary_size)
        print("Traning Model...")
        history = model.fit(
            x_train,
            y_train,
            batch_size=opt.batch_size,
            epochs=opt.epochs,
            verbose=1,
            callbacks=[TestCallback((x_valid, y_valid), model=model)])
        model.save(opt.saved_model)

        # Save the model architecture
        #with open('model_architecture.yaml', 'w') as f:
        #    f.write(model.to_json())

        print("Training cost time: ", time.time() - st)

    else:
        # Model reconstruction from JSON file
        #with open('model_architecture.yaml', 'r') as f:
        #    model = model_from_yaml(f.read())
        model = load_model(
            opt.saved_model,
            custom_objects={'LayerNormalization': LayerNormalization})
        vocabulary = json.load(open(os.path.join("data", "vocab.json")))
        predict_dict = predict_final_word(model, vocabulary, opt.input)
        sub_file = make_submission(predict_dict, opt.student_id, opt.input)
        if opt.score:
            scoring(sub_file, os.path.join("data"), type="valid")

Пример #7

0

Показать файл

Файл: stack.py Проект: glouppe/kaggle-higgs

    X_fold = np.hstack((X_fold, X_pred))

    all_X.append(X_fold)
    all_y.append(y_fold)
    all_w.append(w_fold)

X = np.vstack(all_X)
y = np.concatenate(all_y)
w = np.concatenate(all_w)

clf = Classifier(**params)
w = rescale(w)
w = rebalance(y, w)

try:
    clf.fit(X, y, sample_weight=w)
except:
    clf.fit(X, y)


# And make a submussion
print "Making submission..."
X_test, _, _, _ = load_test()
X_pred = load_predictions("stack/*-test.npy")
X_test = np.hstack((X_test, X_pred))

make_submission(clf, threshold, "output-stacking.csv", X_test=X_test)

import IPython; IPython.embed()

Пример #8

0

Показать файл

Файл: final.py Проект: glouppe/kaggle-higgs

    "bootstrap": False,
    "max_features": 27
}


# Train on the whole training set
def train(Classifier, params, X, y, w, verbose=1):
    if verbose > 0:
        print "[Start]"

    w = rescale(w)
    w = rebalance(y, w)

    clf = Classifier(**params)
    clf.fit(X, y, sample_weight=w)

    if verbose > 0:
        print "[End]"

    return clf


clf = train(Classifier, params, X, y, w)

# Make submission
threshold = -2.74420523643
make_submission(clf, threshold, "output-rs.csv")

import IPython
IPython.embed()

Пример #9

0

Показать файл

Файл: main.py Проект: Strali/toxic-text

def main(args):
    """Train (and evaluate) a GRU-based model for classifying toxic content in
    wikipedia comments. Takes a preprocessed (cleaned, tokenized, and padded)
    comments as input and outputs the probability of six different types of toxicity
    being contained in the comment. Execution is modified by a number of call
    arguments, described below.

    Parameters
    ----------
    --train (-t) : (Re)train the model. Leave this out if only doing inference or
        only evaluating on test set.
    --auxilliary_input (-a) : Use auxilliary input to the model for training and
        testing. Auxilliary input consists of class probabilities calculated using
        ridge regression. Requires that said auxilliary input is already generate
        for a given input sentence.
    --combine_data (-c) : Combine training and test data with additional figshare
        comments when fitting tokenizer to data.
    --submit (-s) : Turn test predictions into a submission for Kaggle.
    --visualise (-v) : Visualise attention activations for a sentence.
    --fasttext (-f) : Use word embeddings trained using fasttext instead of
        pre-trained GloVe embeddings.
    """

    TRAIN = args.train
    USE_AUXILLIARY_INPUT = args.auxilliary_input
    COMBINE_DATA = args.combine_data
    MAKE_SUBMISSION = args.submit
    VISUALISE_FULL_ATTENTION = args.visualise
    USE_FASTTEXT = args.fasttext

    MAX_NUM_WORDS = None
    MAX_LENGTH = 150
    EMBEDDING_DIM = 300
    SKIPGRAM = True

    MAX_EPOCHS = 50
    BATCH_SIZE = 512
    VAL_SPLIT = 0.2
    SENTENCE_NUM = 51

    TOXICITY_THRESHOLD = 0.6

    AVERAGE_ATTENTION = False

    BASE_LR = 0.0001
    MAX_LR = 0.005
    STEP_SIZE = 30000
    CLR_MODE = 'triangular'
    now = datetime.datetime.now()
    now = now.strftime('%Y%m%d%H%M')
    LOG_PATH = './logs/' + now
    WEIGHT_SAVE_PATH = 'weights_base.best.hdf5'
    SUBMISSION_SAVE_PATH = './submissions/submission_' + now + '.csv'
    ES_PATIENCE = 6
    TB_HIST_FREQ = 0
    TB_WRITE_GRAPH = True

    clr_params = {
        'base_lr': BASE_LR,
        'max_lr': MAX_LR,
        'step_size': STEP_SIZE,
        'mode': CLR_MODE
    }
    ckpt_params = {
        'filepath': WEIGHT_SAVE_PATH,
        'verbose': 1,
        'save_best_only': True,
        'save_weights_only': True
    }
    es_params = {'patience': ES_PATIENCE}
    tb_params = {
        'log_dir': LOG_PATH,
        'histogram_freq': TB_HIST_FREQ,
        'write_graph': TB_WRITE_GRAPH,
        'batch_size': BATCH_SIZE,
        'embeddings_freq': MAX_EPOCHS + 1
    }

    callbacks = get_callbacks(clr_params, ckpt_params, es_params, tb_params)

    CLASS_LIST = [
        'toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate'
    ]

    txt_prep = TextPreprocessor(max_nb_words=MAX_NUM_WORDS,
                                max_padding_length=MAX_LENGTH,
                                combine_data=COMBINE_DATA,
                                use_auxilliary_features=USE_AUXILLIARY_INPUT)
    if USE_AUXILLIARY_INPUT:
        X_train, X_aux, y_train, X_test, test_aux, word_index, sample_text, sample_target = \
            txt_prep.load_and_tokenize(class_list=CLASS_LIST,
                                       sample_index=SENTENCE_NUM)
    else:
        X_train, y_train, X_test, word_index, sample_text, sample_target = \
            txt_prep.load_and_tokenize(class_list=CLASS_LIST,
                                       sample_index=SENTENCE_NUM)

    tc = ToxicClassifier(embedding_dim=EMBEDDING_DIM,
                         num_timesteps=MAX_LENGTH,
                         word_index=word_index,
                         weight_path=WEIGHT_SAVE_PATH,
                         use_aux_input=USE_AUXILLIARY_INPUT,
                         average_attention=AVERAGE_ATTENTION,
                         use_ft=USE_FASTTEXT,
                         visualize=VISUALISE_FULL_ATTENTION)

    if USE_AUXILLIARY_INPUT:
        tc.set_input_and_labels(X_train, y_train, X_aux)
        tc.set_sample_sentence(sample_text, X_train[SENTENCE_NUM],
                               y_train[SENTENCE_NUM], X_aux[SENTENCE_NUM])
    else:
        tc.set_input_and_labels(X_train, y_train)
        tc.set_sample_sentence(sample_text, X_train[SENTENCE_NUM],
                               y_train[SENTENCE_NUM])

    tc.build_model(word_index=word_index, use_skipgram=SKIPGRAM)
    tc.model.summary()

    if TRAIN:
        tc.train(max_epochs=MAX_EPOCHS,
                 batch_size=BATCH_SIZE,
                 val_split=VAL_SPLIT,
                 callbacks=callbacks)

        sample_pred = tc.predict_sample_output()
        print('Original sentence: ', sample_text)
        print('Actual label: ', sample_target)
        print('Model prediction :', sample_pred[0, :])
        present_toxicity = get_toxicity_classes(sample_pred[0, :],
                                                TOXICITY_THRESHOLD, CLASS_LIST)
        print_toxicity_report(sample_pred[0, :], TOXICITY_THRESHOLD,
                              CLASS_LIST)

        if VISUALISE_FULL_ATTENTION:
            visualise_attention(tc.attention_history, sample_text)
        else:
            attention = tc.get_attention_output()
            attention /= sum(attention)  # Normalise to percentage
            label = tc.get_sample_labels()
            visualise_attention_with_text(attention, sample_text,
                                          sample_pred[0, :], present_toxicity,
                                          sample_target, label)

    if MAKE_SUBMISSION:
        print('Loading best weights and predicting on test data\n')
        if USE_AUXILLIARY_INPUT:
            make_aux_submission(tc.model,
                                X_test,
                                test_aux,
                                CLASS_LIST,
                                WEIGHT_SAVE_PATH,
                                SUBMISSION_SAVE_PATH,
                                post_process=True)
        else:
            make_submission(tc.model, X_test, CLASS_LIST, WEIGHT_SAVE_PATH,
                            SUBMISSION_SAVE_PATH)

Пример #10

0

Показать файл

Файл: predict.py Проект: Edwinngera/lasagne_CNN_framework

    shape = (None, 3, cfg.WIDTH, cfg.HEIGHT)
    predict_fn = models.get_predict_function(m_param, model_weights, file_fmt, shape);

    load_and_process = ld.LoadAndProcess(
            size = (cfg.WIDTH, cfg.HEIGHT),
            augmentation_params = None,
            crop = None,
            color_noise = 0,
            fill_size = cfg.pretrained);

    batch_size = cfg.batch_size;
    test_imgs,test_labels = ld.list_imgs_labels(cfg.data_dir,data='test');
    test_data = ld.ImgStream(test_imgs, test_labels, batch_size,
            cycle=False, file_dir_fmt=cfg.data_dir+'/test/{}',
            load_and_process = load_and_process, preload=None);

    print("num of test cases: {}".format(len(test_data)));

    res = [];
    c = 0;
    for imgs,labels in test_data:
        res.append(predict_fn(imgs));
        c += 1;
        if c%50 == 0:
            print("{} processed ".format(c*batch_size));

    res = np.concatenate(res);
    filename = cfg.output_dir + "/submit_{}.csv".format(fname);
    print(res[-1])
    utils.make_submission(filename, test_imgs, res, 0.5e-3);

Пример #11

0

Показать файл

Файл: run.py Проект: Naoki1101/kaggle-ashrae

        convert_type=config['data']['convert_type'])

    logging.disable(logging.FATAL)

    if OOF_PARAMS['save_oof']:
        np.save(f'../logs/{RUN_NAME}/oof.npy', oof)
        save_oof_plot(RUN_NAME, train_y, oof, type_='reg', dia=True)

with t.timer('save features importances'):
    save_importances(RUN_NAME, models, FEATURES)

with t.timer('make submission'):
    output_path = LOGGER_PATH / f'{METER_TYPE}.csv'
    make_submission(y_pred=np.mean(preds, axis=1),
                    target_name=TARGET_NAME,
                    sample_path=SAMPLE_SUB_PATH,
                    output_path=str(output_path),
                    comp=True)

LOGGER_PATH.rename(f'../logs/{RUN_NAME}_{np.mean(scores):.3f}')

process_minutes = t.get_processing_time()

with t.timer('notify'):
    message = f'''{MODEL_NAME}\ncv: {np.mean(scores):.3f}\nscores: {scores}\ntime: {process_minutes:.2f}[min]'''

    send_line(NOTIFY_PARAMS['line']['token'], message)

    send_notion(token_v2=NOTIFY_PARAMS['notion']['token_v2'],
                url=NOTIFY_PARAMS['notion']['url'],
                name=RUN_NAME,

Пример #12

0

Показать файл

Файл: kaggle_otto_nn.py Проект: worldofpiggy/kaggle

    model.add(Dropout(0.5))
    
    model.add(Dense(612, 612, init='glorot_uniform'))
    model.add(PReLU((612,)))
    model.add(BatchNormalization((612,)))
    model.add(Dropout(0.5))
    
    model.add(Dense(612, nb_classes, init='glorot_uniform'))
    model.add(Activation('softmax'))   
    model.compile(loss='categorical_crossentropy', optimizer="adam")
    #model.compile(loss='categorical_crossentropy', optimizer="sgd")
    

print("Training model...")
ne = 17
bs = 32
vs = 0.15
model.fit(X, y, nb_epoch=ne, batch_size=bs, validation_split= vs)

print ("Saving model (will overwrite existing one)")
filename = "keras-nn-%d-%d-%d"%(ne,bs,vs)
ut.save(model, filename, verbose=True)

print("Generating submission...")
proba = model.predict_proba(X_test)
ut.make_submission(proba, ids, encoder, fname='keras-otto-proba-93.csv')

#print(type(proba))
#print(proba[0:10,])

Пример #13

0

Показать файл

    l=0
    i=0
    while l<len(set_X_test):
    if(len(set_X_test[l])>0):
        set_X_test[l]['CSPL_RECEIVED_CALLS'] =   listPred[i]
        i=i+1
    l=l+1
    """


    #on réassemble les valeurs de prédiction
    resultPred= pd.concat(set_X_test)
    resultPred=resultPred.sort_index()
    incremental_prediction.append(resultPred)
print "score global = ",score_global.mean()

print("Merging incremental learning...")
resultPred_final=pd.concat(incremental_prediction)
resultPred_final=resultPred_final.sort_values(by=['DATE', 'cod_ASS_ASSIGNMENT'])

print("Make every prediction positif, ceil it ...")
resultPred_final['CSPL_RECEIVED_CALLS']=resultPred_final['CSPL_RECEIVED_CALLS'].apply(lambda x: x*(x>0))
#resultPred_final['CSPL_RECEIVED_CALLS']=resultPred_final['CSPL_RECEIVED_CALLS'].apply(lambda x: 2.5*x)
resultPred_final['CSPL_RECEIVED_CALLS']=resultPred_final['CSPL_RECEIVED_CALLS'].apply(lambda x: math.ceil(x))

print "Write the submission ..."
make_submission(dataTest,resultPred_final)
print "End."

Пример #14

0

Показать файл

Y_train=np.array(Y_train)
X_train=np.array(X_train)
X_test=np.array(X_test)


#### Creation of regressor 
reg=Regressor()


#### Cross validation
print "Cross validation ..."
#loo = cross_validation.LeaveOneOut(len(y_df))
loo=10
scores = cross_validation.cross_val_score(reg, X_train, Y_train, scoring='mean_squared_error', cv=loo,)
print "The score mean of cross validation : "
print scores.mean()

#### fit 
print "Fit ..."
reg.fit(X_train, Y_train)


#### Prediction
print "Prediction ..."
Y_pred = reg.predict(X_test)

#### write the submission
print "Write the submission ..."
make_submission(dataTest,Y_pred)

print "End."

Пример #15

0

Показать файл

#             pred.iloc[idx] = 0

with t.timer('replace with leak'):
    leak = pd.read_feather(DATA_PATH / 'input/leak.feather')
    leak['timestamp'] = leak['timestamp'].astype(str)
    leak.rename(columns={'meter_reading': 'leak_meter_reading'}, inplace=True)

    test_and_leak = pd.merge(test, leak, on=['building_id', 'meter', 'timestamp'], how='left')
    leak_idx = test_and_leak['leak_meter_reading'].dropna().index
    pred.iloc[leak_idx] = test_and_leak.loc[leak_idx, 'leak_meter_reading']

with t.timer('make submission'):
    output_path = str(DATA_PATH / f'output/sub_{RUN_NAME}_{cv}.csv')
    make_submission(y_pred=pred, 
                    target_name=TARGET_NAME, 
                    sample_path=SAMPLE_SUB_PATH,
                    output_path=output_path,
                    comp=True)


# LOGGER_PATH.rename(f'../logs/{RUN_NAME}_{np.mean(scores):.3f}')

process_minutes = t.get_processing_time()

with t.timer('notify'):
    message = f'''{MODEL_NAME}\ncv: {cv:.3f}\nscores: \ntime: {process_minutes:.2f}[min]'''

    send_line(NOTIFY_PARAMS['line']['token'], message)

    send_notion(token_v2=NOTIFY_PARAMS['notion']['token_v2'],
                url=NOTIFY_PARAMS['notion']['url'],

Пример #16

0

Показать файл

Файл: ensemble_preds.py Проект: worldofpiggy/kaggle

from __future__ import print_function

import numpy as np
import pandas as pd
import utils as ut
import os
import xgboost as xgb
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.normalization import BatchNormalization
from keras.layers.advanced_activations import PReLU
from keras.utils import np_utils, generic_utils

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

np.random.seed(1337) # for reproducibility

## check if raw data exist
print("Loading data...")
X, labels = ut.load_data('data/train.csv', train=True)
data, ids = ut.load_data('data/test.csv', train=False)

print("Preprocessing labels")
y, encoder = ut.preprocess_labels(labels)

prediction_files = ["xgb-otto-proba-round-430-eta-0.csv", 
                    "keras-otto-proba-93.csv"]
ensemble = ut.ensemble(prediction_files, weights=[0.4, 0.6]) 
ut.make_submission(ensemble, ids, encoder, fname='ensemble-otto-selected-93.csv')

Пример #17

0

Показать файл

    logging.disable(logging.FATAL)

    if 'nn' in MODEL_NAME:
        save_learning_curve(RUN_NAME, models)

    if SETTINGS_PARAMS['oof']['save']:
        np.save(f'../logs/{RUN_NAME}/oof.npy', oof)
        save_oof_plot(RUN_NAME, train_y, oof, type_='reg', dia=True)

with t.timer('save features importances'):
    save_importances(RUN_NAME, models, FEATURES)

with t.timer('make submission'):
    output_path = f'../data/output/{RUN_NAME}_{np.mean(scores):.3f}.csv'
    make_submission(y_pred=np.mean(preds, axis=1), target_name=COMPE_PARAMS['target_name'],
                    sample_path=PATH_PARAMS['sample'], output_path=str(output_path), comp=False)

LOGGER_PATH.rename(f'../logs/{RUN_NAME}_{np.mean(scores):.3f}')

process_minutes = t.get_processing_time()

with t.timer('notify'):
    message = f'''{MODEL_NAME}\ncv: {np.mean(scores):.3f}\nscores: {scores}\ntime: {process_minutes:.2f}[min]'''

    send_line(NOTIFY_PARAMS['line']['token'], message)

    send_notion(token_v2=NOTIFY_PARAMS['notion']['token_v2'],
                url=NOTIFY_PARAMS['notion']['url'],
                name=RUN_NAME,
                created=NOW,
                model=MODEL_NAME.split('_')[0],

Пример #18

0

Показать файл

Файл: stack.py Проект: glouppe/kaggle-higgs

    X_pred = load_predictions("stack/*-fold%d.npy" % i)
    X_fold = np.hstack((X_fold, X_pred))

    all_X.append(X_fold)
    all_y.append(y_fold)
    all_w.append(w_fold)

X = np.vstack(all_X)
y = np.concatenate(all_y)
w = np.concatenate(all_w)

clf = Classifier(**params)
w = rescale(w)
w = rebalance(y, w)

try:
    clf.fit(X, y, sample_weight=w)
except:
    clf.fit(X, y)

# And make a submussion
print "Making submission..."
X_test, _, _, _ = load_test()
X_pred = load_predictions("stack/*-test.npy")
X_test = np.hstack((X_test, X_pred))

make_submission(clf, threshold, "output-stacking.csv", X_test=X_test)

import IPython
IPython.embed()

Пример #19

0

Показать файл

model.summary()
# %%
X, y = get_data(as_gray=False)

batch_size = 128
ra = ROCAUC(batch_size)
es = EarlyStopping(monitor='val_auc', patience=2, mode='max')
mc = ModelCheckpoint(f'data/models/model.h5',
                     monitor='val_auc',
                     save_best_only=True,
                     mode='max',
                     verbose=1)
model.fit(X,
          y,
          batch_size=batch_size,
          epochs=50,
          validation_split=.2,
          callbacks=[ra, es, mc])

# %%
model.fit(X, y, batch_size=batch_size, epochs=5)

# %%
X_test, test_ids = get_data(test=True, as_gray=False)

test_predictions = model.predict(X_test, batch_size=batch_size)
test_predictions = test_predictions.flatten()
make_submission(test_ids, test_predictions,
                'submissions/first_transfer_cnn.csv')

Пример #20

0

Показать файл

 def on_epoch_end(self, epoch, logs={}):
     x, y = self.test_data
     predict_dict = predict_final_word(self.model, self.vocabulary, self.filename)
     sub_file = make_submission(predict_dict, opt.student_id, opt.input)
     scoring(sub_file, os.path.join("data"), type="valid")

Пример #21

0

Показать файл

Файл: main.py Проект: yhuangbl/language_model

def main(opt):
    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu
    np.random.seed(opt.seed)  # set a seed for reproduciaiblity
    if opt.mode == "train":
        st = time.time()
        print('Loading data')
        x_train, y_train, x_valid, y_valid, vocabulary_size = load_data(
            "data", opt.debug)

        num_training_data = x_train.shape[0]
        sequence_length = x_train.shape[1]
        print(num_training_data)

        print('Vocab Size', vocabulary_size)

        model = build_model(opt.model, opt.embedding_dim, opt.hidden_size,
                            opt.drop, opt.filter, sequence_length,
                            vocabulary_size)
        adam = Adam()
        model.compile(loss='sparse_categorical_crossentropy', optimizer=adam)
        print("Traning Model...")
        checkpoint = ModelCheckpoint(opt.saved_model,
                                     monitor='val_loss',
                                     verbose=1,
                                     save_best_only=True,
                                     mode='min')
        early = EarlyStopping(monitor="val_loss", mode="min", patience=5)
        history = model.fit(x_train,
                            y_train,
                            batch_size=opt.batch_size,
                            epochs=100,
                            verbose=1,
                            validation_data=(x_valid, y_valid),
                            callbacks=[
                                TestCallback((x_valid, y_valid), model=model),
                                checkpoint, early
                            ])
        model.save(opt.saved_model)
        print("Training cost time: ", time.time() - st)
    elif opt.mode == "ensemble":
        x_train, y_train, x_valid, y_valid, vocabulary_size = load_data(
            "data", opt.debug)

        num_training_data = x_train.shape[0]
        sequence_length = x_train.shape[1]
        print(num_training_data)

        print('Vocab Size', vocabulary_size)

        ENSEMBLE_DIR = "models/ensemble/"
        model_files = []
        for (dirpath, dirnames, filenames) in os.walk(ENSEMBLE_DIR):
            model_files.extend(filenames)
            break
        models = []
        model_count = 0
        for filename in model_files:
            model = load_model(ENSEMBLE_DIR + filename)
            model.name = "model" + str(model_count)
            model_count += 1
            models.append(model)

        build_save_ensemble_model(opt.saved_model, models, sequence_length)
    else:
        model = load_model(opt.saved_model)
        vocabulary = json.load(open(os.path.join("data", "vocab.json")))
        predict_dict = predict_final_word(model, vocabulary, opt.input)
        sub_file = make_submission(predict_dict, opt.student_id, opt.input)
        if opt.score:
            scoring(sub_file, os.path.join("data"), type="valid")

Пример #22

0

Показать файл

Файл: main.py Проект: AnotherGitAccount/MachineLearning3

    X_TS = utils.create_fingerprints(TS["SMILES"].values)

    if METHOD == "DT":
        depths, scores = doDecisionTree(X_LS, Y_LS)
        print(scores)

    elif METHOD == "KNN":
        depths, scores = doKNN(X_LS, Y_LS)
        print(scores)

        classifier_knn = KNeighborsClassifier(n_neighbors=50)
        classifier_knn.fit(X_LS, Y_LS)
        pred = classifier_knn.predict_proba(X_TS)
        auc_predicted = 0.7
        fname = utils.make_submission(pred[:, 1], auc_predicted, 'knn_50')
        print('Submission file "{}" successfully written'.format(fname))

    elif METHOD == "RF":
        #ts, depths, scores = doRandomForest(X_LS, Y_LS)
        #print(scores)

        classifier_rf = RandomForestClassifier(n_estimators=800, max_depth=700)
        classifier_rf.fit(X_LS, Y_LS)
        pred = classifier_rf.predict_proba(X_TS)
        auc_predicted = 0.78
        fname = utils.make_submission(pred[:, 1], auc_predicted, 'final')
        print('Submission file "{}" successfully written'.format(fname))

    elif METHOD == "MLP":
        layers, neurones, scores = doMLP(X_LS, Y_LS)

Пример #23

0

Показать файл

Файл: train.py Проект: Naoki1101/kaggle-Melanoma

def main():
    t = Timer()
    seed_everything(cfg.common.seed)

    logger_path.mkdir(exist_ok=True)
    logging.basicConfig(filename=logger_path / 'train.log', level=logging.DEBUG)

    dh.save(logger_path / 'config.yml', cfg)

    with t.timer('load data'):
        train_x = dh.load('../data/input/train_concated.csv')
        train_org_x = dh.load('../data/input/train.csv')
        train_2019_x = dh.load('../data/input/train_2019.csv')
        test_x = dh.load('../data/input/test.csv')

    with t.timer('make folds'):
        fold_org_df = factory.get_fold(cfg.validation.val1, train_org_x, train_org_x[[cfg.common.target]])
        fold2019_df = factory.get_fold(cfg.validation.val2, train_2019_x, train_2019_x[[cfg.common.target]])
        fold_df = pd.concat([fold_org_df, fold2019_df], axis=0, sort=False, ignore_index=True)
        if cfg.validation.val1.single:
            fold_df = fold_df[['fold_0']]
            fold_df /= fold_df['fold_0'].max()

    with t.timer('load features'):
        features = dh.load('../configs/feature/all.yml')['features']
        for f in features:
            train_x[f] = dh.load(f'../features/{f}_train.feather')[f].fillna(-1)
            test_x[f] = dh.load(f'../features/{f}_test.feather')[f].fillna(-1)

    with t.timer('drop several rows'):
        if cfg.common.drop is not None:
            drop_idx = factory.get_drop_idx(cfg.common.drop)
            train_x = train_x.drop(drop_idx, axis=0).reset_index(drop=True)
            fold_df = fold_df.drop(drop_idx, axis=0).reset_index(drop=True)

    with t.timer('train model'):
        result = train_model(run_name, train_x, fold_df, cfg)
    
    logging.disable(logging.FATAL)
    run_name_cv = f'{run_name}_{result["cv"]:.3f}'
    logger_path.rename(f'../logs/{run_name_cv}')

    with t.timer('predict'):
        preds = predict_test(run_name_cv, test_x, fold_df, cfg)

    with t.timer('post process'):
        duplicates = {
            'ISIC_5224960': 1,
            'ISIC_9207777': 1,
            'ISIC_6457527': 1,
            'ISIC_8347588': 0,
            'ISIC_8372206': 1,
            'ISIC_9353360': 1,
            'ISIC_3689290': 0,
            'ISIC_3584949': 0,  
        }
        for image_name, target in duplicates.items():
            idx = test_x[test_x['image_name'] == image_name].index[0]
            preds[idx] = target

    with t.timer('make submission'):
        sample_path = f'../data/input/sample_submission.csv'
        output_path = f'../data/output/{run_name_cv}.csv'
        make_submission(y_pred=preds,
                        target_name=cfg.common.target,
                        sample_path=sample_path,
                        output_path=output_path,
                        comp=False)

    with t.timer('kaggle api'):
        kaggle = Kaggle(cfg.compe.compe_name, run_name_cv)
        if cfg.common.kaggle.submit:
            kaggle.submit(comment)

    with t.timer('notify'):
        process_minutes = t.get_processing_time()
        message = f'''{model_name}\ncv: {result["cv"]:.3f}\ntime: {process_minutes:.2f}[h]'''
        send_line(notify_params.line.token, message)

        notion = Notion(token=notify_params.notion.token_v2)
        notion.set_url(url=notify_params.notion.url)
        notion.insert_rows({
            'name': run_name_cv,
            'created': now,
            'model': cfg.model.name,
            'local_cv': round(result['cv'], 4),
            'time': process_minutes,
            'comment': comment
        })

Пример #24

0

Показать файл

Файл: train.py Проект: Naoki1101/kaggle-Melanoma

def main():
    t = Timer()
    seed_everything(cfg.common.seed)

    logger_path.mkdir(exist_ok=True)
    logging.basicConfig(filename=logger_path / 'train.log',
                        level=logging.DEBUG)

    dh.save(logger_path / 'config.yml', cfg)
    dh.save(logger_path / 'features.yml', features_params)

    with t.timer('load data'):
        train_df = dh.load('../data/input/train.csv')
        train2019_df = dh.load('../data/input/train_concated.csv')
        train_x = factory.get_features(features, cfg.data.loader.train)
        test_x = factory.get_features(features, cfg.data.loader.test)
        train_y = factory.get_target(cfg.data.target)

    with t.timer('add oof'):
        if cfg.data.features.oof.name is not None:
            oof, preds = factory.get_oof(cfg.data)
            train_x['oof'] = oof
            test_x['oof'] = preds
            features.append('oof')

    with t.timer('make folds'):
        fold_df = factory.get_fold(cfg.validation, train_df,
                                   train_df[['target']])
        fold_df = pd.concat([
            fold_df,
            pd.DataFrame(np.zeros((len(train2019_df), len(fold_df.columns))),
                         columns=fold_df.columns)
        ],
                            axis=0,
                            sort=False,
                            ignore_index=True)
        if cfg.validation.single:
            fold_df = fold_df[['fold_0']]
            fold_df /= fold_df['fold_0'].max()

    with t.timer('drop index'):
        if cfg.common.drop is not None:
            drop_idx = factory.get_drop_idx(cfg.common.drop)
            train_x = train_x.drop(drop_idx, axis=0).reset_index(drop=True)
            train_y = train_y.drop(drop_idx, axis=0).reset_index(drop=True)
            fold_df = fold_df.drop(drop_idx, axis=0).reset_index(drop=True)

    with t.timer('prepare for ad'):
        if cfg.data.adversarial_validation:
            train_x, train_y = factory.get_ad(cfg, train_x, test_x)

    with t.timer('train and predict'):
        trainer = Trainer(cfg)
        cv = trainer.train(train_df=train_x,
                           target_df=train_y,
                           fold_df=fold_df)
        preds = trainer.predict(test_x)
        trainer.save(run_name)

        run_name_cv = f'{run_name}_{cv:.3f}'
        logger_path.rename(f'../logs/{run_name_cv}')
        logging.disable(logging.FATAL)

    with t.timer('make submission'):
        sample_path = f'../data/input/sample_submission.csv'
        output_path = f'../data/output/{run_name_cv}.csv'
        make_submission(y_pred=preds,
                        target_name=cfg.data.target.name,
                        sample_path=sample_path,
                        output_path=output_path,
                        comp=False)
        if cfg.common.kaggle.submit:
            kaggle = Kaggle(cfg.compe.name, run_name_cv)
            kaggle.submit(comment)

    with t.timer('notify'):
        process_minutes = t.get_processing_time()
        message = f'''{cfg.model.name}\ncv: {cv:.3f}\ntime: {process_minutes}[min]'''
        send_line(notify_params.line.token, message)

        notion = Notion(token=notify_params.notion.token_v2)
        notion.set_url(url=notify_params.notion.url)
        notion.insert_rows({
            'name': run_name_cv,
            'created': now,
            'model': options.model,
            'local_cv': round(cv, 4),
            'time': process_minutes,
            'comment': comment
        })

Пример #25

0

Показать файл

def main():
    t = Timer()
    seed_everything(cfg.common.seed)

    logger_path.mkdir(exist_ok=True)

    dh.save(logger_path / 'config.yml', cfg)

    with t.timer('load data'):
        train_df = dh.load('../data/input/train_data.csv')
        test_df = dh.load('../data/input/test_data.csv')

        oof = np.zeros((len(train_df), len(cfg.models)))
        preds = np.zeros((len(test_df), len(cfg.models)))

        for i, m in enumerate(cfg.models):
            name = getattr(cfg.models, m).name

            log_dir = Path(f'../logs/{name}')
            model_oof = dh.load(log_dir / 'oof.npy')
            model_cfg = dh.load(log_dir / 'config.yml')
            if model_cfg.common.drop:
                drop_idxs = np.array([])
                for drop_name in model_cfg.common.drop:
                    drop_idx = dh.load(f'../pickle/{drop_name}.npy')
                    drop_idxs = np.append(drop_idxs, drop_idx)
                model_oof = factory.fill_dropped(model_oof, drop_idx)

            model_preds = dh.load(f'../logs/{name}/raw_preds.npy')

            oof[:, i] = model_oof[:len(train_df)]
            preds[:, i] = model_preds

    with t.timer('drop index'):
        if cfg.common.drop is not None:
            drop_idxs = np.array([])
            for drop_name in model_cfg.common.drop:
                drop_idx = dh.load(f'../pickle/{drop_name}.npy')
                drop_idxs = np.append(drop_idxs, drop_idx)
            train_df = train_df.drop(drop_idxs, axis=0).reset_index(drop=True)

    with t.timer('optimize model weight'):
        metric = factory.get_metrics(cfg.common.metrics.name)
        y_true = train_df[cfg.common.target]

        def objective(trial):
            p_list = [0 for i in range(len(cfg.models))]
            for i in range(len(cfg.models) - 1):
                p_list[i] = trial.suggest_discrete_uniform(f'p{i}', 0.0, 1.0 - sum(p_list), 0.01)
            p_list[-1] = round(1 - sum(p_list[:-1]), 2)

            y_pred = np.zeros(len(train_df))
            for i in range(oof.shape[1]):
                y_pred += oof[:, i] * p_list[i]

            return metric(y_true, y_pred)

        study = optuna.create_study(direction='minimize')
        study.optimize(objective, timeout=10)
        best_params = list(study.best_params.values())
        best_weight = best_params + [round(1 - sum(best_params), 2)]

    with t.timer('ensemble'):
        ensemble_oof = np.zeros(len(train_df))
        ensemble_preds = np.zeros(len(test_df))
        for i in range(len(best_weight)):
            ensemble_oof += oof[:, i] * best_weight[i]
            ensemble_preds += preds[:, i] * best_weight[i]

        dh.save(f'../logs/{run_name}/oof.npy', ensemble_oof)
        dh.save(f'../logs/{run_name}/raw_preds.npy', ensemble_preds)

        cv = metric(y_true, ensemble_oof)
        run_name_cv = f'{run_name}_{cv:.3f}'
        logger_path.rename(f'../logs/{run_name_cv}')

        print('\n\n===================================\n')
        print(f'CV: {cv:.4f}')
        print(f'BEST WEIGHT: {best_weight}')
        print('\n===================================\n\n')

    with t.timer('make submission'):
        sample_path = f'../data/input/sample_submission.feather'
        output_path = f'../data/output/{run_name_cv}.csv'
        make_submission(y_pred=ensemble_preds,
                        target_name=cfg.common.target,
                        sample_path=sample_path,
                        output_path=output_path,
                        comp=False)
        if cfg.common.kaggle.submit:
            kaggle = Kaggle(cfg.compe.compe_name, run_name_cv)
            kaggle.submit(comment)

    with t.timer('notify'):
        process_minutes = t.get_processing_time()
        message = f'''{options.model}\ncv: {cv:.3f}\ntime: {process_minutes}[min]'''
        send_line(notify_params.line.token, message)

        notion = Notion(token=notify_params.notion.token_v2)
        notion.set_url(url=notify_params.notion.url)
        notion.insert_rows({
            'name': run_name_cv,
            'created': now,
            'model': options.model,
            'local_cv': round(cv, 4),
            'time': process_minutes,
            'comment': comment
        })

Пример #26

0

Показать файл

# check correlation between base learner predictions
np.corrcoef(X_train_level_2.T)
sns.jointplot(X_train_level_2[:,0], X_train_level_2[:,1])
plt.show()

# simple convex combination between pair
alphas_to_try = np.linspace(0, 1, 1001)
rmse_best = np.Inf
for alpha in alphas_to_try:
    mix = alpha * X_train_level_2[:,0] + (1-alpha) * X_train_level_2[:,1]
    rmse_new = np.sqrt(mean_squared_error(Y_train_level_2, mix))
    if rmse_new < rmse_best:
        alpha_best = alpha
        rmse_best = rmse_new

score = round(rmse_best, 6)
pred_test = alpha_best * X_test_level_2[:,0] + (1-alpha_best) * X_test_level_2[:,1]
ids = np.array(df.loc[df['date_block_num'] == 34, 'ID'])
submission = make_submission(ids, np.array(pred_test).flatten())

# export
today = datetime.datetime.now()
sub_id = today.strftime('%y%m%d') + '_' + today.strftime("%H%M") + \
		'_score_' + str(score)
folder = OUT_FOLDER + '/' + sub_id
os.mkdir(folder)
print('\n---- ' + sub_id + ' ----')
submission.to_csv(os.path.join(folder, 'submission.csv'), index=False)

Пример #27

0

Показать файл

Файл: hog_svm.py Проект: jingjing-shi/ids705-solar-kaggle

# 5 fold cross validation
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
prediction_scores = np.empty(y.shape[0], dtype='object')

for train_idx, val_idx in tqdm(skf.split(X, y)):
    X_train, X_val = X[train_idx], X[val_idx]
    y_train = y[train_idx]

    clf = clf.fit(X_train, y_train)
    y_pred = clf.predict_proba(X_val)[:, 1]

    # Save the predictions for this fold
    prediction_scores[val_idx] = y_pred

plt.title('SVM 5-fold cross validation ROC AUC')
plot_roc(y, prediction_scores)
plt.savefig('report/figures/svm_roc.png', dpi=300)

plot_prediction_samples(imgs, y, prediction_scores, 'SVM Prediction Samples')
plt.savefig('report/figures/svm_confmat.png', dpi=300)
# %%

# load and preprocess test data then create submission
X_test, test_ids = get_data(test=True)
X_test = np.stack([get_HOG(img, **hog_params) for img in X_test])

clf = clf.fit(X, y)
test_predictions = clf.predict_proba(X_test)[:, 1]
make_submission(test_ids,
                test_predictions,
                fname='submissions/svc_10_hog_16_4_fulltrain.csv')

Пример #28

0

Показать файл

Файл: anomaly_detection.py Проект: junzhuang-code/audio_classification

X_test = read_pickle('../audio_data/X_test4d.pkl')
Y_train = read_pickle('../audio_data/Y_train1d.pkl')
print("The shape of X_train/X_test/Y_train: ", X_train.shape, X_test.shape, Y_train.shape)

# Instantiate the model
bigan = BIGAN(X_train.shape[1], X_train.shape[2], X_train.shape[3])

if is_trainable:
    # Training the BiGAN
    bigan.train_by_batch(X_train, epochs=NUM_EPOCHS, batch_size=BATCH_SIZE)
    #bilstm.train_all(X_train_, Y_train_, BATCH_SIZE, NUM_EPOCHS)
else:
    # Restore the checkpoint
    checkpoint_dir = './runs/checkpoint_bigan'
    checkpoint = tf.train.Checkpoint()
    checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)).expect_partial()
    print("Checkpoint restored for Anomaly Detection!")

    # Anomaly Detection
    AS = bigan.compute_anomaly_score(X_train, Y_train, X_test)
    # Prediction
    ts = NUM_OUTLIERS/len(X_test) # Find out the best threshold
    Y_pred_AS = bigan.predict_outlier(AS, ts)
    #print("Y_pred_AS: ", Counter(Y_pred_AS))

    # Geneate final Y_pred and make submission
    Y_pred = np.load('Y_pred.npy')
    Y_pred_new = gen_Y_pred(Y_pred, Y_pred_AS)
    print("Y_pred_new.shape: ", Y_pred_new.shape)
    make_submission(Y_pred_new, "submission")

Пример #29

0

Показать файл

models = []
for train_idx, val_idx in skf.split(X, y):
    X_train, X_val = X[train_idx], X[val_idx]
    y_train, y_val = y[train_idx], y[val_idx]
    model = get_model()
    y_train = to_categorical(y_train)
    model.fit_generator(imagen.flow(X_train, y_train, batch_size=BATCH_SIZE),
                        steps_per_epoch=batch_per_epoch,
                        epochs=EPOCHS,
                        verbose=0)
    prediction_scores[val_idx] = model.predict(X_val, batch_size=BATCH_SIZE)[:,
                                                                             1]
    cur_auc = roc_auc_score(y_val, prediction_scores[val_idx])
    print(cur_auc)
    if cur_auc < 0.8:
        break
    models.append(model)

print(roc_auc_score(y, prediction_scores))

# %%
X_test, test_ids = get_data(test=True, as_gray=False)
X_test = X_test / 255.

test_predictions = np.mean(
    [m.predict(X_test, batch_size=BATCH_SIZE)[:, 1] for m in models], axis=0)

make_submission(test_ids, test_predictions, 'submissions/homebrew_cnn_CV.csv')
# %%
[m.save(f'data/models/model_fold_{i}.h5') for i, m in enumerate(models)]

Пример #30

0

Показать файл

    X_LS = fingerprints.transform(LS['SMILES'].values, FINGERPRINT)
    y_LS = LS['ACTIVE'].values

    # Variance threshold (feature selection)
    selector = VarianceThreshold()
    selector.fit(X_LS)
    X_LS = selector.transform(X_LS)

    # Cross validation score
    cv = ShuffleSplit(n_splits=5, test_size=0.25, random_state=0)
    scores = cross_val_score(MODEL, X_LS, y_LS, cv=cv, scoring='roc_auc')

    # Estimated AUC
    AUC = scores.mean()

    # Train model
    MODEL.fit(X_LS, y_LS)

    # Create fingerprint features of test set
    X_TS = fingerprints.transform(TS['SMILES'].values, FINGERPRINT)
    X_TS = selector.transform(X_TS)

    # Predict
    prob = MODEL.predict_proba(X_TS)[:, -1]

    # Writing the submission file
    os.makedirs(DESTINATION, exist_ok=True)
    fname = utils.make_submission(prob, AUC, DESTINATION + 'submission')

    print('Submission file "{}" successfully written'.format(fname))

Python make_submission примеры использования