def write_result(ids, predicts, ofile, is_infer=True): x = ids impression_id = x['impression_id'] prob = gezi.sigmoid(predicts) position = x['position'] df = pd.DataFrame({ 'impression_id': impression_id, 'position': position, 'score': prob }) if FLAGS.infer_part is not None: ofile = ofile.replace('.csv', '.{FLAGS.infer_part}.csv') df.to_csv(ofile, index=False, header=False, sep=' ') return df = df.sort_values(['impression_id', 'position']) tqdm.pandas() df = df.groupby('impression_id')['score'].progress_apply(list).reset_index( name='scores') df['scores2'] = df.scores.progress_apply( lambda x: '[' + ','.join(map(str, x)) + ']') df[['impression_id', 'scores2']].to_csv(ofile, index=False, header=False, sep=' ') if is_infer: df.scores2 = df.scores.progress_apply(lambda x: '[' + ','.join( map(str, (-np.asarray(x)).argsort().argsort() + 1)) + ']') odir = os.path.dirname(ofile) df[['impression_id', 'scores2']].to_csv(f'{odir}/prediction.txt', index=False, header=False, sep=' ') os.system(f'cd {odir};zip prediction.zip prediction.txt')
def evaluate(y_true, y_pred, x): y_prob = gezi.sigmoid(y_pred) df = pd.DataFrame({ 'y_true': y_true, 'y_prob': y_prob, 'impression_id': x['impression_id'], 'uid': x['uid'], 'did': x['did'], 'history_len': x['hist_len'], 'uid_in_train': x['uid_in_train'], 'did_in_train': x['did_in_train'] }) return evaluate_df(df)
def valid_write(ids, labels, predicts, out): for id, label, predict in zip(ids, labels, predicts): print('{},{},{:.3f}'.format(id, label, gezi.sigmoid(predict)), file=out)
def evaluate(y, y_, model_path=None): y_ = gezi.sigmoid(y_) auc = roc_auc_score(y, y_) loss = log_loss(y, y_) return [auc, loss], ['auc', 'loss']