Пример #1
0
def write_result(ids, predicts, ofile, is_infer=True):
    x = ids
    impression_id = x['impression_id']
    prob = gezi.sigmoid(predicts)
    position = x['position']
    df = pd.DataFrame({
        'impression_id': impression_id,
        'position': position,
        'score': prob
    })

    if FLAGS.infer_part is not None:
        ofile = ofile.replace('.csv', '.{FLAGS.infer_part}.csv')
        df.to_csv(ofile, index=False, header=False, sep=' ')
        return

    df = df.sort_values(['impression_id', 'position'])
    tqdm.pandas()
    df = df.groupby('impression_id')['score'].progress_apply(list).reset_index(
        name='scores')
    df['scores2'] = df.scores.progress_apply(
        lambda x: '[' + ','.join(map(str, x)) + ']')
    df[['impression_id', 'scores2']].to_csv(ofile,
                                            index=False,
                                            header=False,
                                            sep=' ')
    if is_infer:
        df.scores2 = df.scores.progress_apply(lambda x: '[' + ','.join(
            map(str, (-np.asarray(x)).argsort().argsort() + 1)) + ']')
        odir = os.path.dirname(ofile)
        df[['impression_id', 'scores2']].to_csv(f'{odir}/prediction.txt',
                                                index=False,
                                                header=False,
                                                sep=' ')
        os.system(f'cd {odir};zip prediction.zip prediction.txt')
Пример #2
0
def evaluate(y_true, y_pred, x):
    y_prob = gezi.sigmoid(y_pred)

    df = pd.DataFrame({
        'y_true': y_true,
        'y_prob': y_prob,
        'impression_id': x['impression_id'],
        'uid': x['uid'],
        'did': x['did'],
        'history_len': x['hist_len'],
        'uid_in_train': x['uid_in_train'],
        'did_in_train': x['did_in_train']
    })

    return evaluate_df(df)
Пример #3
0
def valid_write(ids, labels, predicts, out):
    for id, label, predict in zip(ids, labels, predicts):
        print('{},{},{:.3f}'.format(id, label, gezi.sigmoid(predict)),
              file=out)
Пример #4
0
def evaluate(y, y_, model_path=None):
    y_ = gezi.sigmoid(y_)
    auc = roc_auc_score(y, y_)
    loss = log_loss(y, y_)
    return [auc, loss], ['auc', 'loss']