def evaluate(labels, predicts, ids=None, model_path=None): # TODO here use softmax will cause problem... not correct.. for f1 probs = gezi.softmax(predicts) #adjusted_probs = gezi.softmax(predicts * FLAGS.logits_factor) * class_weights * [1, 4., 5., 1.] adjusted_probs = gezi.softmax( predicts * FLAGS.logits_factor) * class_weights mean_vals = [] mean_names = [] #vals, names = calc_f1(labels, predicts, model_path) vals, names = calc_f1(labels, probs, model_path) mean_vals.append(vals[0]) mean_names.append(names[0]) vals = vals[1:] names = names[1:] vals_adjusted, names_adjusted = calc_f1(labels, adjusted_probs, model_path, name='adjusted_f1') mean_vals.append(vals_adjusted[0]) mean_names.append(names_adjusted[0]) vals += vals_adjusted[1:] names += names_adjusted[1:] vals_loss, names_loss = calc_loss(labels, probs, model_path) mean_vals.append(vals_loss[0]) mean_names.append(names_loss[0]) vals += vals_loss[1:] names += names_loss[1:] probs = predicts if not FLAGS.auc_need_softmax else probs vals_auc, names_auc = calc_auc(labels, probs, model_path) mean_vals.append(vals_auc[0]) mean_names.append(names_auc[0]) vals += vals_auc[1:] names += names_auc[1:] vals = mean_vals + vals names = mean_names + names return vals, names
def to_predict(logits, weights=None, is_single=False, adjust=True): logits = np.reshape(logits, [-1, num_attrs, num_classes]) ## DO NOT divde !! if is_single: factor = FLAGS.logits_factor else: if weights is None: factor = 1. else: factor = FLAGS.logits_factor / weights #print('factor:', factor) if adjust and FLAGS.adjust or FLAGS.num_grids: logits = logits * factor probs = gezi.softmax(logits, -1) probs *= class_weights else: probs = logits probs = np.reshape(probs, [-1, num_classes]) result = np.zeros([len(probs)], dtype=int) for i, prob in enumerate(probs): # # TODO try to calibrate to 0.5 ? # if prob[0] >= 0.6: # result[i] = -2 # else: # result[i] = np.argmax(prob[1:]) - 1 # this can also improve but not as good as per attr class weights adjust, can get 7183 # TODO class_weights right now still not the best! #prob[0] *= 0.4 result[i] = np.argmax(prob) - 2 result = np.reshape(result, [-1, num_attrs]) return result
def evaluate(labels, predicts, ids=None, model_path=None): vals, names = calc_acc(labels, predicts, ids, model_path) probs = gezi.softmax(predicts) vals_loss, names_loss = calc_loss(labels, probs, ids, model_path) vals += vals_loss names += names_loss return vals, names
def evaluate(labels, logits, ids=None): logits = logits[:, :len(classes)] predicts = np.argmax(logits, -1) acc = np.mean(np.equal(predicts, labels)) probs = gezi.softmax(logits) loss = log_loss(labels, probs) vals = [loss, acc] names = ['loss', 'acc'] return vals, names
def evaluate(labels, logits, ids=None): logits = logits[:, :num_classes] predicts = np.argmax(logits, -1) acc = np.mean(np.equal(predicts, labels)) probs = gezi.softmax(logits) loss = metrics.log_loss(labels, probs) kappa = metrics.cohen_kappa_score(labels, predicts) vals = [loss, acc, kappa] names = ['loss', 'acc', 'kappa'] return vals, names
def to_predict(logits): logits = np.reshape(logits, [-1, num_attrs, 4]) probs = gezi.softmax(logits, -1) probs = np.reshape(probs, [-1, 4]) result = np.zeros([len(probs)], dtype=np.int32) for i, prob in enumerate(probs): if prob[0] >= 0.6: result[i] = -2 else: result[i] = np.argmax(prob[1:]) - 1 result = np.reshape(result, [-1, num_attrs]) return result
def main(_): melt.apps.init() #ev.init() model = getattr(base, FLAGS.model)() model.debug = True melt.eager.restore(model) ids2text.init() vocab = ids2text.vocab # query = '阿里和腾讯谁更流氓' # passage = '腾讯比阿里流氓' # query = 'c罗和梅西谁踢球更好' # passage = '梅西比c罗踢的好' query = '青光眼遗传吗' passage = '青光眼有遗传因素的,所以如果是您的父亲是青光眼的话,那我们在这里就强烈建议您,自己早期到医院里面去做一个筛查,测一下,看看眼,尤其是检查一下视野,然后视网膜的那个情况,都做一个早期的检查。' qids = text2ids(query) qwords = [vocab.key(qid) for qid in qids] print(qids) print(ids2text.ids2text(qids)) pids = text2ids(passage) pwords = [vocab.key(pid) for pid in pids] print(pids) print(ids2text.ids2text(pids)) x = { 'query': [qids], 'passage': [pids], 'type': [0], } logits = model(x)[0] probs = gezi.softmax(logits) print(probs) print(list(zip(CLASSES, [x for x in probs]))) predict = np.argmax(logits, -1) print('predict', predict, CLASSES[predict]) # print words importance scores word_scores_list = model.pooling.word_scores for word_scores in word_scores_list: print(list(zip(pwords, word_scores[0].numpy())))
def score_ensemble(ensemble, label): blend_preds = np.zeros([len(scores_list[0]), NUM_CLASSES]) for model in ensemble: scores = scores_list[model] scores = np.reshape(scores, [-1, NUM_ATTRIBUTES, NUM_CLASSES]) scores = scores[:, label] blend_preds += scores predict = to_one_predict(blend_preds, label, len(ensemble)) index = FLAGS.num_train score = f1_score(labels_list[model][:, label][:index], predict[:index], average='macro') probs = gezi.softmax(blend_preds / len(ensemble)) loss = log_loss(labels_list[model][:, label][:index], probs[:index]) valid_score = f1_score(labels_list[model][:, label][index:], predict[index:], average='macro') valid_loss = log_loss(labels_list[model][:, label][index:], probs[index:]) return score, loss, valid_score, valid_loss
def write(id, label, predict, out, out2=None, is_infer=False): infos = valid_infos if not is_infer else test_infos info = infos[id] score = gezi.softmax(predict) predict = np.argmax(predict) candidates = info['candidates'].split('|') if label is not None: label = candidates[label] predict = candidates[predict] #.. on P40 not ok.. WHY ? FIXME # print(id, label, predict, score, gezi.csv(info['candidates']), info['type'], gezi.csv(info['query_str']), gezi.csv(info['passage_str']), # gezi.csv(ids2text.ids2text(info['query'], sep='|')), gezi.csv(ids2text.ids2text(info['passage'], sep='|')), sep=',', file=out) # File "/home/slurm/job/tmp/job-58821/wenzheng/projects/ai2018/reader/evaluate.py", line 178, in write # print(id, label, predict, score, gezi.csv(info['candidates']), info['type'], gezi.csv(info['query_str']), gezi.csv(info['passage_str']), sep=',', file=out) # UnicodeEncodeError: 'latin-1' codec can't encode characters in position 0-1: ordinal not in range(256) print(id, label, predict, score, gezi.csv(info['candidates']), info['type'], gezi.csv(info['query_str']), gezi.csv(info['passage_str']), sep=',', file=out) #print(id, label, predict, score, sep=',', file=out) if is_infer: #for contest print(id, predict, sep='\t', file=out2)
def main(_): melt.apps.init() #ev.init() model = getattr(base, FLAGS.model)() model.debug = True melt.eager.restore(model) ids2text.init() vocab = ids2text.vocab content = '这是一个很好的餐馆,菜很不好吃,我还想再去' content = '这是一个很差的餐馆,菜很不好吃,我不想再去' content = '这是一个很好的餐馆,菜很好吃,我还想再去' content = '这是一个很好的餐馆,只是菜很难吃,我还想再去' content = '这是一个很好的餐馆,只是菜很不好吃,我还想再去' cids = text2ids(content) words = [vocab.key(cid) for cid in cids] print(cids) print(ids2text.ids2text(cids)) x = {'content': [cids]} logits = model(x)[0] probs = gezi.softmax(logits, 1) print(probs) print(list(zip(ATTRIBUTES, [list(x) for x in probs]))) predicts = np.argmax(logits, -1) - 2 print('predicts ', predicts) print(list(zip(ATTRIBUTES, predicts))) adjusted_predicts = ev.to_predict(logits) print('apredicts', adjusted_predicts) print(list(zip(ATTRIBUTES, adjusted_predicts))) # print words importance scores word_scores_list = model.pooling.word_scores for word_scores in word_scores_list: print(list(zip(words, word_scores[0].numpy())))
def predict(content): x, words = convert(content) logits = model(x)[0] logits = logits.detach().cpu().numpy() probs = gezi.softmax(logits, 1) #print(probs) print(list(zip(ATTRIBUTES, [list(x) for x in probs]))) predicts = np.argmax(logits, -1) - 2 print('predicts ', predicts) print(list(zip(ATTRIBUTES, predicts))) # adjusted_predicts = ev.to_predict(logits) # print('apredicts', adjusted_predicts) # print(list(zip(ATTRIBUTES, adjusted_predicts))) alpha = model.pooling.poolings[0].alpha alpha = alpha.detach().cpu().numpy() for i, attr in enumerate(ATTRIBUTES): scores = alpha[0, i] print(attr, predicts[i]) for word, score in zip(words, scores): print(word, score * len(words))
if len(sys.argv) > 2: file1 = sys.argv[1] file2 = sys.argv[2] df1 = pd.read_csv(file1) df1 = df1.sort_values('id') df2 = pd.read_csv(file2) df2 = df2.sort_values('id') scores1 = [gezi.str2scores(x) for x in df1['score'].values] scores2 = [gezi.str2scores(x) for x in df2['score'].values] scores1 = np.reshape(scores1, [-1, len(ATTRIBUTES), 4]) scores1 = gezi.softmax(scores1) scores2 = np.reshape(scores2, [-1, len(ATTRIBUTES), 4]) scores2 = gezi.softmax(scores2) ndf1 = pd.DataFrame() ndf2 = pd.DataFrame() for i, attr in enumerate(ATTRIBUTES): score1 = np.reshape(scores1[:, i, :], [-1]) score2 = np.reshape(scores2[:, i, :], [-1]) ndf1[attr] = score1 ndf2[attr] = score2 print('Attr:----------------------------------------------------%s' % attr) print(' Pearson\'s correlation score: %0.6f' % ndf1[attr].corr(
def main(_): print('METHOD:', FLAGS.method) print('Norm factor:', FLAGS.norm_factor) if FLAGS.num_grids <= 1: FLAGS.grid_search = False # if FLAGS.grid_search: # FLAGS.debug = False DEBUG = FLAGS.debug idir = FLAGS.idir # first id, sencod content .. idx = 2 valid_files = glob.glob(f'{idir}/*.valid.csv') valid_files = [x for x in valid_files if not 'ensemble' in x] if not DEBUG: print('VALID then INFER') infer_files = glob.glob(f'{idir}/*.infer.csv.debug') else: print('Debug mode INFER ill write result using valid ids, just for test') infer_files = glob.glob(f'{idir}/*.valid.csv') infer_files = [x for x in infer_files if not 'ensemble' in x] print('num_ensembles', len(valid_files), 'num_infers', len(infer_files)) assert len(valid_files) == len(infer_files), infer_files global num_ensembles num_ensembles = len(valid_files) # need global ? even only read? global class_weights #print('-----------', class_weights) kf = KFold(n_splits=FLAGS.num_folds, shuffle=True, random_state=FLAGS.seed) print('loading all valid csv') dfs = [] for file_ in tqdm(valid_files, ascii=True): df = pd.read_csv(file_) df = df.sort_values('id') dfs.append(df) logits_f1_list = [] logits_adjusted_f1_list = [] probs_f1_list = [] probs_adjusted_f1_list = [] grids_logits_adjusted_f1_list = [] logits_predict_list = [] logits_adjusted_predict_list = [] probs_predict_list = [] probs_adjusted_predict_list = [] grids_logits_adjusted_predict_list = [] labels_list = [] results_list = [] def split_train_valid(x): if FLAGS.num_folds == 1: return x, x else: total = 15000 assert total % FLAGS.num_folds == 0 num_valid = int(total / FLAGS.num_folds) num_train = total - num_valid return x[:num_train], x[num_train:] for fold, (train_index, valid_index) in enumerate(kf.split(dfs[0])): print('FOLD_%s---------------------------' % fold) print(train_index, valid_index) class_factors = np.ones([num_attrs, num_classes]) class_weights = ori_class_weights # logits sum results results = None # prob sum results results2 = None weights = [] scores_list = [] for fid, df in enumerate(dfs): file_ = valid_files[fid] train = df.iloc[train_index] valid = df.iloc[valid_index] #if fid == 0: train_labels = train.iloc[:, idx:idx+num_attrs].values valid_labels = valid.iloc[:, idx:idx+num_attrs].values labels = np.concatenate([train_labels, valid_labels], 0) train_predicts = train.iloc[:, idx+num_attrs:idx+2*num_attrs].values valid_predicts = valid.iloc[:, idx+num_attrs:idx+2*num_attrs].values predicts = np.concatenate([train_predicts, valid_predicts], 0) train_scores = train['score'] valid_scores = valid['score'] scores = np.concatenate([train_scores, valid_scores], 0) scores = [parse(score) for score in scores] scores = np.array(scores) scores_list.append(scores) train_labels, valid_labels = split_train_valid(labels) train_predicts, valid_predicts = split_train_valid(predicts) train_scores, valid_scores = split_train_valid(scores) f1s = calc_f1s(train_labels, train_predicts) f1s_adjusted = calc_f1s(train_labels, to_predict(train_scores, is_single=True)) train_probs = gezi.softmax(train_scores.reshape([-1, NUM_ATTRIBUTES, NUM_CLASSES])) aucs = calc_aucs(train_labels + 2, train_probs) losses = calc_losses(train_labels + 2, train_probs) f1 = np.mean(f1s) f1_adjusted = np.mean(f1s_adjusted) print('%-3d' % fid, '%-100s' % file_, '%.5f' % f1, '%.5f' % f1_adjusted, '%.5f' % np.mean(aucs), '%.5f' % np.mean(losses)) if FLAGS.weight_by == 'loss': weight = np.reshape(1 / losses, [num_attrs, 1]) elif FLAGS.weight_by == 'auc': weight = np.reshape(aucs, [num_attrs, 1]) else: weight = np.reshape(f1s_adjusted, [num_attrs, 1]) weights.append(weight) weights = np.array(weights) scores_list = np.array(scores_list) blend_weights(weights, FLAGS.norm_factor) # if DEBUG: # print(weights) print('-----------calc weight and score') for fid in tqdm(range(len(valid_files)), ascii=True): scores = scores_list[fid] if results is None: results = np.zeros([len(scores), num_attrs * num_classes]) results2 = np.zeros([len(scores), num_attrs * num_classes]) weight = weights[fid] if FLAGS.method == 'avg' or FLAGS.method == 'mean': weight = 1. for i, score in enumerate(scores): score = np.reshape(score, [num_attrs, num_classes]) * weight score = np.reshape(score, [-1]) results[i] += score # notice softmax([1,2]) = [0.26894142, 0.73105858] softmax([2,4]) = [0.11920292, 0.88079708] score = np.reshape(score, [num_attrs, num_classes]) # this not work because *weight already.. #score *= FLAGS.logits_factor score = gezi.softmax(score, -1) #score *= class_weights score = np.reshape(score, [-1]) results2[i] += score sum_weights = np.sum(weights, 0) train_results, valid_results = split_train_valid(results) train_results2, valid_results2 = split_train_valid(results2) print('-----------using prob ensemble') adjusted_predict_prob = to_predict(valid_results2, sum_weights, adjust=False) adjusted_f1_prob = calc_f1(valid_labels, adjusted_predict_prob) valid_results2 = np.reshape(valid_results2, [-1, num_attrs, num_classes]) predicts2 = np.argmax(valid_results2, -1) - 2 f1_prob = calc_f1(valid_labels, predicts2) probs_f1_list.append(f1_prob) probs_adjusted_f1_list.append(adjusted_f1_prob) probs_predict_list.append(predicts2) probs_adjusted_predict_list.append(adjusted_predict_prob) print('%-40s' % 'f1_prob:', '%.5f' % f1_prob) print('%-40s' % 'adjusted f1_prob:', '%.5f' % adjusted_f1_prob) # print('-----------detailed f1 infos (ensemble by prob)') # _, adjusted_f1_probs, class_f1s = calc_f1_alls(valid_labels, to_predict(results2[num_train:], sum_weights, adjust=False)) # for i, attr in enumerate(ATTRIBUTES): # print(attr, adjusted_f1_probs[i]) # for i, cls in enumerate(CLASSES): # print(cls, class_f1s[i]) print('-----------using logits ensemble') adjusted_predict = to_predict(valid_results, sum_weights) adjusted_f1 = calc_f1(valid_labels, adjusted_predict) valid_results = np.reshape(valid_results, [-1, num_attrs, num_classes]) predicts = np.argmax(valid_results, -1) - 2 f1 = calc_f1(valid_labels, predicts) logits_f1_list.append(f1) logits_adjusted_f1_list.append(adjusted_f1) logits_predict_list.append(predicts) logits_adjusted_predict_list.append(adjusted_predict) results_list.append(valid_results) labels_list.append(valid_labels) print('%-40s' % 'f1:', '%.5f' % f1) print('%-40s' % 'adjusted f1:', '%.5f' % adjusted_f1) if FLAGS.show_detail: print('-----------detailed f1 infos (ensemble by logits)') _, adjusted_f1s, class_f1s = calc_f1_alls(valid_labels, to_predict(valid_results, sum_weights)) for i, attr in enumerate(ATTRIBUTES): print('%-40s' % attr, '%.5f' % adjusted_f1s[i]) for i, cls in enumerate(CLASSES): print('%-40s' % cls, '%.5f' % class_f1s[i]) print('%-40s' % 'f1:', '%.5f' % f1) print('%-40s' % 'f1 prob:', '%.5f' % f1_prob) print('%-40s' % 'adjusted f1 prob:', '%.5f' % adjusted_f1_prob) print('%-40s' % 'adjusted f1:', '%.5f' % adjusted_f1) if FLAGS.grid_search: print('------------grid search num_grids', FLAGS.num_grids) class_factors = grid_search_class_factors(gezi.softmax(np.reshape(train_results, [-1, num_attrs, num_classes]) * (FLAGS.logits_factor / sum_weights)), train_labels, class_weights, num_grids=FLAGS.num_grids) if FLAGS.show_detail: print('class_factors1 with num_grids', FLAGS.num_grids) print(class_factors) # adjust class weights to get better result from grid search class_weights = class_weights * class_factors adjusted_f1_before_grids = adjusted_f1 print('after dynamic adjust class factors') adjusted_predict = to_predict(valid_results, sum_weights) adjusted_f1 = calc_f1(valid_labels, adjusted_predict) valid_results = np.reshape(valid_results, [-1, num_attrs, num_classes]) grids_logits_adjusted_f1_list.append(adjusted_f1) grids_logits_adjusted_predict_list.append(adjusted_predict) print('-----------using logits ensemble') print('%-40s' % 'adjusted f1 before grids:', '%.5f' % adjusted_f1_before_grids) print('%-40s' % 'adjusted f1:', '%.5f' % adjusted_f1) if FLAGS.show_detail: print('-----------detailed f1 infos (ensemble by logits)') _, adjusted_f1s, class_f1s = calc_f1_alls(valid_labels, to_predict(valid_results, sum_weights)) for i, attr in enumerate(ATTRIBUTES): print('%-40s' % attr, '%.5f' % adjusted_f1s[i]) for i, cls in enumerate(CLASSES): print('%-40s' % cls, '%.5f' % class_f1s[i]) print('%-40s' % 'adjusted f1 before grids:', '%.5f' % adjusted_f1_before_grids) print('%-40s' % 'adjusted f1:', '%.5f' % adjusted_f1) print('-------------------------------------OVERALL mean') print('ensemble by probs') print('%-40s' % 'f1', '%.5f' % np.mean(probs_f1_list)) print('%-40s' % 'adjustedf f1', '%.5f' % np.mean(probs_adjusted_f1_list)) print('ensemble by logits') print('%-40s' % 'f1:', '%.5f' % np.mean(logits_f1_list)) print('%-40s' % 'adjusted f1:', '%.5f' % np.mean(logits_adjusted_f1_list)) if FLAGS.grid_search: print('ensemble by logits after grid search') print('%-40s' % 'adjusted f1', '%.5f' % np.mean(grids_logits_adjusted_f1_list)) print('-------------------------------------OVERALL recalc') labels = np.concatenate(labels_list, 0) print('ensemble by probs') print('%-40s' % 'f1', '%.5f' % calc_f1(labels, np.concatenate(probs_predict_list, 0))) print('%-40s' % 'adjustedf f1', '%.5f' % calc_f1(labels, np.concatenate(probs_adjusted_predict_list, 0))) print('ensemble by logits') print('%-40s' % 'f1:', '%.5f' % calc_f1(labels, np.concatenate(logits_predict_list, 0))) print('%-40s' % 'adjusted f1:', '%.5f' % calc_f1(labels, np.concatenate(logits_adjusted_predict_list, 0))) if FLAGS.grid_search: print('ensemble by logits after grid search') print('%-40s' % 'adjusted f1 after grid search', '%.5f' % calc_f1(labels, np.concatenate(grids_logits_adjusted_predict_list, 0))) _, adjusted_f1s, class_f1s = calc_f1_alls(labels, np.concatenate(logits_adjusted_predict_list, 0)) for i, attr in enumerate(ATTRIBUTES): print('%-40s' % attr, '%.5f' % adjusted_f1s[i]) for i, cls in enumerate(CLASSES): print('%-40s' % cls, '%.5f' % class_f1s[i]) print('%-40s' % 'f1', '%.5f' % calc_f1(labels, np.concatenate(logits_predict_list, 0))) print('%-40s' % 'adjusted f1', '%.5f' % calc_f1(labels, np.concatenate(logits_adjusted_predict_list, 0))) if FLAGS.grid_search: print('%-40s' % 'adjusted f1 after grid search', '%.5f' % calc_f1(labels, np.concatenate(grids_logits_adjusted_predict_list, 0))) results = np.concatenate(results_list, 0) results = results.reshape([-1, NUM_ATTRIBUTES, NUM_CLASSES]) #factor = FLAGS.logits_factor / sum_weights #print('%-40s' % '* factor loss', '%.5f' % calc_loss(labels, gezi.softmax(results * factor))) print('%-40s' % 'loss', '%.5f' % calc_loss(labels, gezi.softmax(results)))
def main(_): print('METHOD:', FLAGS.method) print('Norm factor:', FLAGS.norm_factor) # if FLAGS.num_grids: # FLAGS.debug = False DEBUG = FLAGS.debug idir = FLAGS.idir # first id, sencod content .. idx = 2 # logits sum results results = None # prob sum results results2 = None valid_files = glob.glob(f'{idir}/*.valid.csv') valid_files = [x for x in valid_files if not 'ensemble' in x] if not DEBUG: print('VALID then INFER') infer_files = glob.glob(f'{idir}/*.infer.csv.debug') else: print( 'Debug mode INFER ill write result using valid ids, just for test') infer_files = glob.glob(f'{idir}/*.valid.csv') infer_files = [x for x in infer_files if not 'ensemble' in x] print('num_ensembles', len(valid_files)) print('num_infers', len(infer_files)) if not FLAGS.debug: assert len(valid_files) == len(infer_files), infer_files global num_ensembles num_ensembles = len(valid_files) # need global ? even only read? global class_weights #print('-----------', class_weights) # weights is for per model weight weights = [] scores_list = [] valid_files_ = [] for fid, file_ in enumerate(valid_files): df = pd.read_csv(file_) df = df.sort_values('id') labels = df.iloc[:, idx:idx + num_attrs].values predicts = df.iloc[:, idx + num_attrs:idx + 2 * num_attrs].values scores = df['score'] scores = [parse(score) for score in scores] scores = np.array(scores) scores_list.append(scores) #f1 = calc_f1(labels, predicts) #f1 = calc_f1(labels, to_predict(scores)) #f1s = calc_f1s(labels, predicts) ## to_predict better # f1_file = gezi.strip_suffix(file_, '.valid.csv') + '.f1s.npy' # f1_adjusted_file = gezi.strip_suffix(file_, '.valid.csv') + '.f1s.adjust.npy' # if not os.path.exists(f1_file): f1s = calc_f1s(labels, predicts) f1s_adjusted = calc_f1s(labels, to_predict(scores, is_single=True)) probs = gezi.softmax(scores.reshape([-1, NUM_ATTRIBUTES, NUM_CLASSES])) aucs = calc_aucs(labels + 2, probs) losses = calc_losses(labels + 2, probs) # np.save(f1_file, f1s) # np.save(f1_adjusted_file, f1s_adjusted) # else: # f1s = np.load(f1_file) # f1s_adjusted = np.load(f1_adjusted_file) f1 = np.mean(f1s) f1_adjusted = np.mean(f1s_adjusted) print(fid, file_, f1, f1_adjusted, np.mean(aucs), np.mean(losses)) if f1_adjusted < FLAGS.thre: print('ignore', file_) continue else: valid_files_.append(file_) # NOTICE weighted can get 7186 while avg only 716 # and using original f1s score higher #weight = np.reshape(f1s, [num_attrs, 1]) #weight = np.reshape(f1s_adjusted, [num_attrs, 1]) #weight = np.reshape(aucs, [num_attrs, 1]) if FLAGS.weight_by == 'loss': weight = np.reshape(1 / losses, [num_attrs, 1]) elif FLAGS.weight_by == 'auc': weight = np.reshape(aucs, [num_attrs, 1]) else: weight = np.reshape(f1s_adjusted, [num_attrs, 1]) weights.append(weight) weights = np.array(weights) scores_list = np.array(scores_list) blend_weights(weights, FLAGS.norm_factor) # if DEBUG: # print(weights) valid_files = valid_files_ print('final num valid files', len(valid_files)) for fid in tqdm(range(len(valid_files)), ascii=True): scores = scores_list[fid] if results is None: results = np.zeros([len(scores), num_attrs * num_classes]) results2 = np.zeros([len(scores), num_attrs * num_classes]) weight = weights[fid] if FLAGS.method == 'avg' or FLAGS.method == 'mean': weight = 1. for i, score in enumerate(scores): score = np.reshape(score, [num_attrs, num_classes]) * weight score = np.reshape(score, [-1]) results[i] += score # notice softmax([1,2]) = [0.26894142, 0.73105858] softmax([2,4]) = [0.11920292, 0.88079708] score = np.reshape(score, [num_attrs, num_classes]) # this not work because *weight already.. #score *= FLAGS.logits_factor score = gezi.softmax(score, -1) #score *= class_weights score = np.reshape(score, [-1]) results2[i] += score sum_weights = np.sum(weights, 0) adjusted_f1 = calc_f1(labels, to_predict(results, sum_weights)) results = np.reshape(results, [-1, num_attrs, num_classes]) predicts = np.argmax(results, -1) - 2 f1 = calc_f1(labels, predicts) print('-----------using logits ensemble') print('f1:', f1) print('adjusted f1:', adjusted_f1) adjusted_f1_prob = calc_f1(labels, to_predict(results2, sum_weights, adjust=False)) results2 = np.reshape(results2, [-1, num_attrs, num_classes]) predicts2 = np.argmax(results2, -1) - 2 f1_prob = calc_f1(labels, predicts2) print('-----------using prob ensemble') print('f1_prob:', f1_prob) print('adjusted f1_prob:', adjusted_f1_prob) print('-----------detailed f1 infos (ensemble by prob)') _, adjusted_f1_probs, class_f1s = calc_f1_alls( labels, to_predict(results2, sum_weights, adjust=False)) for i, attr in enumerate(ATTRIBUTES): print(attr, adjusted_f1_probs[i]) for i, cls in enumerate(CLASSES): print(cls, class_f1s[i]) print('-----------detailed f1 infos (ensemble by logits)') _, adjusted_f1s, class_f1s = calc_f1_alls(labels, to_predict(results, sum_weights)) for i, attr in enumerate(ATTRIBUTES): print(attr, adjusted_f1s[i]) for i, cls in enumerate(CLASSES): print(cls, class_f1s[i]) print(f'adjusted f1_prob:[{adjusted_f1_prob}]') print(f'adjusted f1:[{adjusted_f1}]') loss = calc_loss( labels, gezi.softmax(results.reshape([-1, NUM_ATTRIBUTES, NUM_CLASSES]))) print(f'loss:[{loss}]') class_factors = np.ones([num_attrs, num_classes]) if FLAGS.num_grids: class_factors = grid_search_class_factors(gezi.softmax( np.reshape(results, [-1, num_attrs, num_classes]) * (FLAGS.logits_factor / sum_weights)), labels, class_weights, num_grids=FLAGS.num_grids) print('class_factors') print(class_factors) # adjust class weights to get better result from grid search class_weights = class_weights * class_factors print('after dynamic adjust class factors') adjusted_f1 = calc_f1(labels, to_predict(results, sum_weights)) results = np.reshape(results, [-1, num_attrs, num_classes]) #predicts = np.argmax(results, -1) - 2 #f1 = calc_f1(labels, predicts) print('-----------using logits ensemble') #print('f1:', f1) print('adjusted f1:', adjusted_f1) print('-----------detailed f1 infos (ensemble by logits)') _, adjusted_f1s, class_f1s = calc_f1_alls(labels, to_predict(results, sum_weights)) for i, attr in enumerate(ATTRIBUTES): print(attr, adjusted_f1s[i]) for i, cls in enumerate(CLASSES): print(cls, class_f1s[i]) # print(f'adjusted f1_prob:[{adjusted_f1_prob}]') # print(f'adjusted f1:[{adjusted_f1}]') #-------------infer print('------------infer') ofile = os.path.join(idir, 'ensemble.infer.csv') file_ = gezi.strip_suffix(file_, '.debug') df = pd.read_csv(file_) idx = 2 results = None results2 = None for fid, file_ in enumerate(infer_files): df = pd.read_csv(file_) df = df.sort_values('id') print(fid, file_, len(df)) if not FLAGS.debug: assert len(df) == 200000 if results is None: results = np.zeros([len(df), num_attrs * num_classes]) results2 = np.zeros([len(df), num_attrs * num_classes]) scores = df['score'] scores = [parse(score) for score in scores] scores = np.array(scores) weight = weights[fid] if FLAGS.method == 'avg' and FLAGS.method == 'mean': weight = 1. for i, score in enumerate(scores): score = np.reshape( np.reshape(score, [num_attrs, num_classes]) * weight, [-1]) results[i] += score score = gezi.softmax(np.reshape(score, [num_attrs, num_classes]), -1) score = np.reshape(score, [-1]) results2[i] += score #predicts = to_predict(results2, sum_weights) predicts = to_predict(results, sum_weights) if not DEBUG: columns = df.columns[idx:idx + num_attrs].values else: columns = df.columns[idx + num_attrs:idx + 2 * num_attrs].values if not DEBUG: ofile = os.path.join(idir, 'ensemble.infer.csv') else: ofile = os.path.join(idir, 'ensemble.valid.csv') if not DEBUG: file_ = gezi.strip_suffix(file_, '.debug') print('temp csv using for write', file_) df = pd.read_csv(file_) else: print('debug test using file', valid_files[-1]) df = pd.read_csv(valid_files[-1]) # for safe must sort id df = df.sort_values('id') # TODO better ? not using loop ? for i, column in enumerate(columns): df[column] = predicts[:, i] if DEBUG: print('check blend result', calc_f1(df.iloc[:, idx:idx + num_attrs].values, predicts)) print(f'adjusted f1_prob:[{adjusted_f1_prob}]') print(f'adjusted f1:[{adjusted_f1}]') print(f'loss:[{loss}]') print('out:', ofile) if not DEBUG: df.to_csv(ofile, index=False, encoding="utf_8_sig") print('---------------results', results.shape) df['score'] = [x for x in results] factor = FLAGS.logits_factor / sum_weights #print('--------sum_weights', sum_weights) #print('--------factor', factor) logits = np.reshape(results, [-1, num_attrs, num_classes]) # DO NOT USE *=... will change results... logits = logits * factor probs = gezi.softmax(logits, -1) probs *= class_weights logits = np.reshape(logits, [-1, num_attrs * num_classes]) print('---------------logits', logits.shape) print('----results', results) print('----logits', logits) #df['logit'] = [x for x in logits] probs = np.reshape(probs, [-1, num_attrs * num_classes]) print('---------------probs', probs.shape) #df['prob'] = [x for x in probs] if not DEBUG: ofile = os.path.join(idir, 'ensemble.infer.debug.csv') else: ofile = os.path.join(idir, 'ensemble.valid.csv') print('out debug:', ofile) df.to_csv(ofile, index=False, encoding="utf_8_sig")
def build_features(index): mode = get_mode(FLAGS.input) start_index = FLAGS.start_index out_file = os.path.dirname(FLAGS.vocab_) + '/{0}/{1}.record'.format(mode, index + start_index) os.system('mkdir -p %s' % os.path.dirname(out_file)) print('---out_file', out_file) # TODO now only gen one tfrecord file total = len(df) num_records = FLAGS.num_records_ ## TODO FIXME whty here still None ? FLAGS.num_records has bee modified before in main as 7 ... #print('---------', num_records, FLAGS.num_records_) if not num_records: if mode.split('.')[-1] in ['valid', 'test', 'dev', 'pm'] or 'valid' in FLAGS.input: num_records = 1 else: num_records = 7 #print('------------------', num_records, FLAGS.num_records_) start, end = gezi.get_fold(total, num_records, index) print('total', total, 'infile', FLAGS.input, 'out_file', out_file) max_len = 0 max_num_ids = 0 num = 0 with melt.tfrecords.Writer(out_file) as writer: for i in tqdm(range(start, end), ascii=True): try: row = df.iloc[i] id = str(row[0]) if seg_result: if id not in seg_result: print('id %s ot found in seg_result' % id) continue words = seg_result[id] if FLAGS.content_limit_: # NOW only for bert! if len(words) + 2 > FLAGS.content_limit_: words = words[:FLAGS.content_limit_ - 3 - 50] + ['[MASK]'] + words[-50:] #print(words) if FLAGS.add_start_end_: words = gezi.add_start_end(words, FLAGS.start_mark, FLAGS.end_mark) if pos_result: pos = pos_result[id] if FLAGS.add_start_end_: pos = gezi.add_start_end(pos) if ner_result: ner = ner_result[id] if FLAGS.add_start_end_: ner = gezi.add_start_end(ner) if start_index > 0: id == 't' + id content = row[1] content_ori = content content = filter.filter(content) if not FLAGS.use_soft_label_: if 'test' in mode: label = [-2] * 20 else: label = list(row[2:]) #label = [x + 2 for x in label] #num_labels = len(label) else: label = [0.] * 80 if not FLAGS.is_soft_label: for idx, val in enumerate(row[2:]): label[idx * 4 + val] = 1. else: logits = np.array(gezi.str2scores(row['score'])) logits = np.reshape(logits, [20, 4]) probs = gezi.softmax(logits) label = list(np.reshape(probs, [-1])) if not seg_result: content_ids, words = text2ids_(content, preprocess=False, return_words=True) assert len(content_ids) == len(words) else: content_ids = [vocab.id(x) for x in words] #print(words, content_ids) #exit(0) if len(content_ids) > max_len: max_len = len(content_ids) print('max_len', max_len) if len(content_ids) > FLAGS.word_limit and len(content_ids) < 5: print('{} {} {}'.format(id, len(content_ids), content_ori)) #if len(content_ids) > FLAGS.word_limit: # print(id, content) # if mode not in ['test', 'valid']: # continue #if len(content_ids) < 5 and mode not in ['test', 'valid']: # continue content_ids = content_ids[:FLAGS.word_limit] words = words[:FLAGS.word_limit] # NOTICE different from tf, pytorch do not allow all 0 seq for rnn.. if using padding mode if FLAGS.use_char: chars = [list(word) for word in words] char_ids = np.zeros([len(content_ids), FLAGS.char_limit], dtype=np.int32) vocab_ = char_vocab if char_vocab else vocab for i, token in enumerate(chars): for j, ch in enumerate(token): if j == FLAGS.char_limit: break char_ids[i, j] = vocab_.id(ch) char_ids = list(char_ids.reshape(-1)) if np.sum(char_ids) == 0: print('------------------------bad id', id) print(content_ids) print(words) exit(0) else: char_ids = [0] if pos_vocab: assert pos pos = pos[:FLAGS.word_limit] pos_ids = [pos_vocab.id(x) for x in pos] else: pos_ids = [0] if ner_vocab: assert ner if pos_vocab: assert len(pos) == len(ner) ner = ner[:FLAGS.word_limit] ner_ids = [ner_vocab.id(x) for x in ner] else: ner_ids = [0] wlen = [len(word) for word in words] feature = { 'id': melt.bytes_feature(id), 'content': melt.int64_feature(content_ids), 'content_str': melt.bytes_feature(content_ori), 'char': melt.int64_feature(char_ids), 'pos': melt.int64_feature(pos_ids), # might also be postion info for mix seg 'ner': melt.int64_feature(ner_ids), 'wlen': melt.int64_feature(wlen), 'source': melt.bytes_feature(mode), } feature['label'] = melt.int64_feature(label) if not FLAGS.use_soft_label_ else melt.float_feature(label) # TODO currenlty not get exact info wether show 1 image or 3 ... record = tf.train.Example(features=tf.train.Features(feature=feature)) writer.write(record) num += 1 global counter with counter.get_lock(): counter.value += 1 global total_words with total_words.get_lock(): total_words.value += len(content_ids) except Exception: print(traceback.format_exc(), file=sys.stderr) pass
if type1_score < 0.5: ignore_type1s.add(i) print('ignore typ1 for', file_) # only use df0 ids = df0['id'].values scores = df0['score'] scores = [parse(score) for score in scores] for id, score in zip(ids, scores): if id not in results: results[id] = score else: results[id] += score scores2 = gezi.softmax(scores, -1) for id, score in zip(ids, scores2): if id not in results2: results2[id] = score else: results2[id] += score match = 0 for id, score in results.items(): index = np.argmax(score, -1) #print(id, score, index) predict = candidates[id].split('|')[index] if predict == m[id]: match += 1
#if name.startswith('others_willing_to_consume_again_'): if name.startswith(attr): for j in range(num_classes): df2[f'{name}_{j}'] = scores[:, i, j] #print(df2) i = ATTRIBUTES.index(attr) idx = 1 x = df2.iloc[:,idx+num_attrs:].values steps = range(10) probs = gezi.softmax(x * 10) weights = np.load('./mount/temp/ai2018/sentiment/class_weights.npy') weights = weights[i] weights = (weights * weights * weights) index = np.argsort(-weights) print(weights) print(index) kf = KFold(n_splits=2, shuffle=True) kf.get_n_splits(x) from sklearn.model_selection import train_test_split
def main(_): DEBUG = FLAGS.debug idir = FLAGS.idir print('METHOD:', FLAGS.method) cadidates = {} m = {} results = {} results1 = {} results2 = {} valid_files = glob.glob(f'{idir}/*.valid.csv') if not DEBUG: print('VALID then INFER') infer_files = glob.glob(f'{idir}/*.infer.txt.debug') else: print( 'Debug mode INFER ill write result using valid ids, just for test') infer_files = glob.glob(f'{idir}/*.valid.csv') print('num_ensembles', len(valid_files)) print('num_infers', len(infer_files)) assert len(valid_files) == len( infer_files), f'{len(valid_files)} {len(infer_files)}' num_ensembles = len(valid_files) wether_ids = None gdf = None weights = [] weights_if = [] weights_wether = [] scores_list = [] def get_weight(id, weight, weight_if, weight_wether): if FLAGS.method == 'avg' or FLAGS.method == 'mean': return 1. if not FLAGS.use_type: weight_ = weight else: if id in wether_ids: weight_ = weight_wether else: weight_ = weight_if # well we use avg mean for wether_if... #weight_ = 1. return weight_ for i, file_ in enumerate(valid_files): df = pd.read_csv(file_) df = df.sort_values('id') ids = df['id'].values labels = df['label'].values if i == 0: gdf = df candidates_ = df['candidates'].values predicts = df['predict'].values scores = df['score'] scores = [parse(score) for score in scores] scores = np.array(scores) scores_list.append(scores) if not m: m = dict(zip(ids, labels)) candidates = dict(zip(ids, candidates_)) df0 = df.loc[df['type'] == 0] df1 = df.loc[df['type'] == 1] labels0 = df0['label'].values predicts0 = df0['predict'].values labels1 = df1['label'].values predicts1 = df1['predict'].values if not wether_ids: wether_ids = set(df1['id'].values) acc = np.mean(np.equal(labels, predicts)) acc_if = np.mean(np.equal(labels0, predicts0)) acc_wether = np.mean(np.equal(labels1, predicts1)) weights.append(acc) weights_if.append(acc_if) weights_wether.append(acc_wether) print(i, file_, 'acc:', acc, 'acc_if:', acc_if, 'acc_wether:', acc_wether, 'num_if:', len(df0), 'num_wether:', len(df1)) blend_weights(weights, 1.) blend_weights(weights_if, 100.) # weights_if similar as disable weights blend_weights(weights_wether, 0.001) # 75106 #blend_weights(weights_wether, 0.01) #751 print('weights', weights) print('weights_if', weights_if) print('weights_wether', weights_wether) for i in tqdm(range(len(valid_files)), ascii=True): scores = scores_list[i] weight = weights[i] weight_if = weights_if[i] weight_wether = weights_wether[i] for id, score in zip(ids, scores): weight_ = get_weight(id, weight, weight_if, weight_wether) score *= weight_ if id not in wether_ids: if id not in results: results[id] = copy.copy(score) else: results[id] += score if id not in results1: results1[id] = copy.copy(score) else: results1[id] += score score = gezi.softmax(score) if id not in results2: results2[id] = copy.copy(score) else: results2[id] += score if id in wether_ids: if id not in results: results[id] = copy.copy(score) else: results[id] += score match = 0 match_if = 0 match_wether = 0 for id, score in results1.items(): index = np.argmax(score, -1) #print(id, score, index) predict = candidates[id].split('|')[index] match_now = 1 if predict == m[id] else 0 match += match_now if id not in wether_ids: match_if += match_now else: match_wether += match_now print('--------------by logit') print('acc_if_by_logit', match_if / (len(results1) - len(wether_ids))) print('add_wether_by_logit', match_wether / len(wether_ids)) print('acc_by_logit', match / len(results1)) match = 0 match_if = 0 match_wether = 0 for id, score in results2.items(): index = np.argmax(score, -1) predict = candidates[id].split('|')[index] match_now = 1 if predict == m[id] else 0 match += match_now if id not in wether_ids: match_if += match_now else: match_wether += match_now print('---------------by prob') print('acc_if_by_prob', match_if / (len(results2) - len(wether_ids))) print('add_wether_by_prob', match_wether / len(wether_ids)) print('acc_by_prob', match / len(results2)) match = 0 match_if = 0 match_wether = 0 for id, score in results.items(): index = np.argmax(score, -1) predict = candidates[id].split('|')[index] match_now = 1 if predict == m[id] else 0 match += match_now if id not in wether_ids: match_if += match_now else: match_wether += match_now print('--------------if by logit, wether by prob') print('acc_if_final', match_if / (len(results) - len(wether_ids))) print('add_wether_final', match_wether / len(wether_ids)) print('acc_final', match / len(results)) cadidates = {} results = {} for i, file_ in enumerate(infer_files): df = pd.read_csv(file_) df = df.sort_values('id') ids = df['id'].values candidates_ = df['candidates'].values scores = df['score'] scores = [parse(score) for score in scores] scores = np.array(scores) df1 = df.loc[df['type'] == 1] ids1 = df1['id'].values if not wether_ids: wether_ids = set(ids1) if not results: candidates = dict(zip(ids, candidates_)) print(i, file_) weight = weights[i] weight_if = weights_if[i] weight_wether = weights_wether[i] for id, score in zip(ids, scores): weight_ = get_weight(id, weight, weight_if, weight_wether) score = score * weight_ if id not in wether_ids: if id not in results: results[id] = copy.copy(score) else: results[id] += score else: score = gezi.softmax(score) if id not in results: results[id] = copy.copy(score) else: results[id] += score ofile = os.path.join(idir, 'ensemble.infer.txt') print('out:', ofile) with open(ofile, 'w') as out: for id, score in results.items(): index = np.argmax(score, -1) #print(id, score, index) predict = candidates[id].split('|')[index] print(id, predict, sep='\t', file=out) ofile = os.path.join(idir, 'ensemble.infer.debug.txt') print('out debug:', ofile) if not DEBUG: with open(ofile, 'w') as out: for id, score in results.items(): index = np.argmax(score, -1) #print(id, score, index) predict = candidates[id].split('|')[index] print(id, predict, score, sep='\t', file=out) else: predicts = np.array([ candidates[id].split('|')[np.argmax(score, -1)] for id, score in results.items() ]) gdf['predict'] = predicts print('check acc:', np.mean(np.equal(gdf['label'].values, gdf['predict'].values))) gdf.to_csv(ofile, index=False)
def main(_): print('METHOD:', FLAGS.method) print('Norm factor:', FLAGS.norm_factor) DEBUG = FLAGS.debug idir = FLAGS.idir if not DEBUG: FLAGS.infer = True FLAGS.num_folds = 1 #FLAGS.num_grids = 10 # first id, sencod content .. idx = 2 valid_files = glob.glob(f'{idir}/*.valid.csv') valid_files = [x for x in valid_files if not 'ensemble' in x] if not DEBUG: print('VALID then INFER') infer_files = glob.glob(f'{idir}/*.infer.csv.debug') else: print('Debug mode INFER ill write result using valid ids, just for test') infer_files = glob.glob(f'{idir}/*.valid.csv') infer_files = [x for x in infer_files if not 'ensemble' in x] print('num_ensembles', len(valid_files), 'num_infers', len(infer_files)) assert len(valid_files) == len(infer_files), infer_files global num_ensembles num_ensembles = len(valid_files) # need global ? even only read? global class_weights #print('-----------', class_weights) print('loading all valid csv') dfs = [] for file_ in tqdm(valid_files, ascii=True): df = pd.read_csv(file_) df = df.sort_values('id') dfs.append(df) if FLAGS.num_folds > 1: kf = KFold(n_splits=FLAGS.num_folds, shuffle=True, random_state=FLAGS.seed) dataset = kf.split(dfs[0]) else: ids = dfs[0]['id'].values dataset = [(ids, ids)] logits_f1_list = [] logits_adjusted_f1_list = [] probs_f1_list = [] probs_adjusted_f1_list = [] grids_logits_adjusted_f1_list = [] logits_predict_list = [] logits_adjusted_predict_list = [] probs_predict_list = [] probs_adjusted_predict_list = [] grids_logits_adjusted_predict_list = [] labels_list = [] results_list = [] def split_train_valid(x): if FLAGS.num_folds == 1: return x, x else: total = 15000 assert total % FLAGS.num_folds == 0 num_valid = int(total / FLAGS.num_folds) num_train = total - num_valid return x[:num_train], x[num_train:] for fold, (train_index, valid_index) in enumerate(dataset): print('FOLD_%s---------------------------' % fold) print('train:', train_index, 'valid:', valid_index) class_factors = np.ones([num_attrs, num_classes]) class_weights = ori_class_weights # logits sum results results = None # prob sum results results2 = None weights = [] scores_list = [] for fid, df in enumerate(dfs): file_ = valid_files[fid] train = df.iloc[train_index] valid = df.iloc[valid_index] #if fid == 0: train_labels = train.iloc[:, idx:idx+num_attrs].values valid_labels = valid.iloc[:, idx:idx+num_attrs].values labels = np.concatenate([train_labels, valid_labels], 0) train_predicts = train.iloc[:, idx+num_attrs:idx+2*num_attrs].values valid_predicts = valid.iloc[:, idx+num_attrs:idx+2*num_attrs].values predicts = np.concatenate([train_predicts, valid_predicts], 0) train_scores = train['score'] valid_scores = valid['score'] scores = np.concatenate([train_scores, valid_scores], 0) scores = [parse(score) for score in scores] scores = np.array(scores) scores_list.append(scores) train_labels, valid_labels = split_train_valid(labels) train_predicts, valid_predicts = split_train_valid(predicts) train_scores, valid_scores = split_train_valid(scores) f1s = calc_f1s(train_labels, train_predicts) f1s_adjusted = calc_f1s(train_labels, to_predict(train_scores, is_single=True)) train_probs = gezi.softmax(train_scores.reshape([-1, NUM_ATTRIBUTES, NUM_CLASSES])) aucs = calc_aucs(train_labels + 2, train_probs) losses = calc_losses(train_labels + 2, train_probs) f1 = np.mean(f1s) f1_adjusted = np.mean(f1s_adjusted) print('%-3d' % fid, '%-100s' % file_, '%.5f' % f1, '%.5f' % f1_adjusted, '%.5f' % np.mean(aucs), '%.5f' % np.mean(losses)) if FLAGS.weight_by == 'loss': weight = np.reshape(1 / losses, [num_attrs, 1]) elif FLAGS.weight_by == 'auc': weight = np.reshape(aucs, [num_attrs, 1]) else: weight = np.reshape(f1s_adjusted, [num_attrs, 1]) weights.append(weight) weights = np.array(weights) scores_list = np.array(scores_list) weights = blend(weights, FLAGS.norm_factor) sum_weights = np.sum(weights, 0) # print('weights\n', weights) # print('sum_weights\n', sum_weights) # if DEBUG: # print(weights) print('-----------calc weight and score') for fid in tqdm(range(len(valid_files)), ascii=True): scores = scores_list[fid] if results is None: results = np.zeros([len(scores), num_attrs * num_classes]) results2 = np.zeros([len(scores), num_attrs * num_classes]) weight = weights[fid] #print(fid, valid_files[fid], '\n', ['%.5f' % x for x in np.reshape(weight, [-1])]) if FLAGS.method == 'avg' or FLAGS.method == 'mean': weight = 1. for i, score in enumerate(scores): score = np.reshape(score, [num_attrs, num_classes]) * weight score = np.reshape(score, [-1]) results[i] += score # notice softmax([1,2]) = [0.26894142, 0.73105858] softmax([2,4]) = [0.11920292, 0.88079708] score = np.reshape(score, [num_attrs, num_classes]) # this not work because *weight already.. #score *= FLAGS.logits_factor score = gezi.softmax(score, -1) #score *= class_weights score = np.reshape(score, [-1]) results2[i] += score train_results, valid_results = split_train_valid(results) train_results2, valid_results2 = split_train_valid(results2) print('-----------using prob ensemble') adjusted_predict_prob = to_predict(valid_results2, sum_weights, adjust=False) adjusted_f1_prob = calc_f1(valid_labels, adjusted_predict_prob) valid_results2 = np.reshape(valid_results2, [-1, num_attrs, num_classes]) predicts2 = np.argmax(valid_results2, -1) - 2 f1_prob = calc_f1(valid_labels, predicts2) probs_f1_list.append(f1_prob) probs_adjusted_f1_list.append(adjusted_f1_prob) probs_predict_list.append(predicts2) probs_adjusted_predict_list.append(adjusted_predict_prob) print('%-40s' % 'f1_prob:', '%.5f' % f1_prob) print('%-40s' % 'adjusted f1_prob:', '%.5f' % adjusted_f1_prob) # print('-----------detailed f1 infos (ensemble by prob)') # _, adjusted_f1_probs, class_f1s = calc_f1_alls(valid_labels, to_predict(results2[num_train:], sum_weights, adjust=False)) # for i, attr in enumerate(ATTRIBUTES): # print(attr, adjusted_f1_probs[i]) # for i, cls in enumerate(CLASSES): # print(cls, class_f1s[i]) print('-----------using logits ensemble') adjusted_predict = to_predict(valid_results, sum_weights) adjusted_f1 = calc_f1(valid_labels, adjusted_predict) valid_results = np.reshape(valid_results, [-1, num_attrs, num_classes]) predicts = np.argmax(valid_results, -1) - 2 f1 = calc_f1(valid_labels, predicts) logits_f1_list.append(f1) logits_adjusted_f1_list.append(adjusted_f1) logits_predict_list.append(predicts) logits_adjusted_predict_list.append(adjusted_predict) results_list.append(valid_results) labels_list.append(valid_labels) print('%-40s' % 'f1:', '%.5f' % f1) print('%-40s' % 'adjusted f1:', '%.5f' % adjusted_f1) if FLAGS.show_detail: print('-----------detailed f1 infos (ensemble by logits)') _, adjusted_f1s, class_f1s = calc_f1_alls(valid_labels, to_predict(valid_results, sum_weights)) for i, attr in enumerate(ATTRIBUTES): print('%-40s' % attr, '%.5f' % adjusted_f1s[i]) for i, cls in enumerate(CLASSES): print('%-40s' % cls, '%.5f' % class_f1s[i]) print('%-40s' % 'f1:', '%.5f' % f1) print('%-40s' % 'f1 prob:', '%.5f' % f1_prob) print('%-40s' % 'adjusted f1 prob:', '%.5f' % adjusted_f1_prob) print('%-40s' % 'adjusted f1:', '%.5f' % adjusted_f1) if FLAGS.num_grids: print('------------grid search num_grids', FLAGS.num_grids) class_factors = grid_search_class_factors(gezi.softmax(np.reshape(train_results, [-1, num_attrs, num_classes]) * (FLAGS.logits_factor / sum_weights)), train_labels, class_weights, num_grids=FLAGS.num_grids) if FLAGS.show_detail: print('class_factors1 with num_grids', FLAGS.num_grids) print(class_factors) # adjust class weights to get better result from grid search class_weights = class_weights * class_factors adjusted_f1_before_grids = adjusted_f1 print('after dynamic adjust class factors') adjusted_predict = to_predict(valid_results, sum_weights) adjusted_f1 = calc_f1(valid_labels, adjusted_predict) valid_results = np.reshape(valid_results, [-1, num_attrs, num_classes]) grids_logits_adjusted_f1_list.append(adjusted_f1) grids_logits_adjusted_predict_list.append(adjusted_predict) print('-----------using logits ensemble') print('%-40s' % 'adjusted f1 before grids:', '%.5f' % adjusted_f1_before_grids) print('%-40s' % 'adjusted f1:', '%.5f' % adjusted_f1) if FLAGS.show_detail: print('-----------detailed f1 infos (ensemble by logits)') _, adjusted_f1s, class_f1s = calc_f1_alls(valid_labels, to_predict(valid_results, sum_weights)) for i, attr in enumerate(ATTRIBUTES): print('%-40s' % attr, '%.5f' % adjusted_f1s[i]) for i, cls in enumerate(CLASSES): print('%-40s' % cls, '%.5f' % class_f1s[i]) print('%-40s' % 'adjusted f1 before grids:', '%.5f' % adjusted_f1_before_grids) print('%-40s' % 'adjusted f1:', '%.5f' % adjusted_f1) # print('-------------------------------------OVERALL mean') # print('ensemble by probs') # print('%-40s' % 'f1', '%.5f' % np.mean(probs_f1_list)) # print('%-40s' % 'adjustedf f1', '%.5f' % np.mean(probs_adjusted_f1_list)) # print('ensemble by logits') # print('%-40s' % 'f1:', '%.5f' % np.mean(logits_f1_list)) # print('%-40s' % 'adjusted f1:', '%.5f' % np.mean(logits_adjusted_f1_list)) # if FLAGS.num_grids: # print('ensemble by logits after grid search') # print('%-40s' % 'adjusted f1', '%.5f' % np.mean(grids_logits_adjusted_f1_list)) print('-------------------------------------OVERALL recalc') labels = np.concatenate(labels_list, 0) print('ensemble by probs') print('%-40s' % 'f1', '%.5f' % calc_f1(labels, np.concatenate(probs_predict_list, 0))) print('%-40s' % 'adjustedf f1', '%.5f' % calc_f1(labels, np.concatenate(probs_adjusted_predict_list, 0))) print('ensemble by logits') predicts = np.concatenate(logits_predict_list, 0) print('%-40s' % 'f1:', '%.5f' % calc_f1(labels, predicts)) adjusted_predicts = np.concatenate(logits_adjusted_predict_list, 0) print('%-40s' % 'adjusted f1:', '%.5f' % calc_f1(labels, adjusted_predicts)) if FLAGS.num_grids: print('ensemble by logits after grid search') grids_predicts = np.concatenate(grids_logits_adjusted_predict_list, 0) print('%-40s' % 'adjusted f1 after grid search', '%.5f' % calc_f1(labels, grids_predicts)) _, adjusted_f1s, class_f1s = calc_f1_alls(labels, adjusted_predicts) for i, attr in enumerate(ATTRIBUTES): print('%-40s' % attr, '%.5f' % adjusted_f1s[i]) for i, cls in enumerate(CLASSES): print('%-40s' % cls, '%.5f' % class_f1s[i]) print('%-40s' % 'f1', '%.5f' % calc_f1(labels, predicts)) print('%-40s' % 'adjusted f1', '%.5f' % calc_f1(labels, adjusted_predicts)) if FLAGS.num_grids: print('%-40s' % 'adjusted f1 after grid search', '%.5f' % calc_f1(labels, grids_predicts)) results = np.concatenate(results_list, 0) results = results.reshape([-1, NUM_ATTRIBUTES, NUM_CLASSES]) #factor = FLAGS.logits_factor / sum_weights #print('%-40s' % '* factor loss', '%.5f' % calc_loss(labels, gezi.softmax(results * factor))) ## directly do softmax on results since sum weights is 1 loss = calc_loss(labels, gezi.softmax(results)) print('%-40s' % 'loss', '%.5f' % loss) print('f1:class predictions distribution') counts = get_distribution(predicts) for attr, count in zip(ATTRIBUTES, counts): print('%-40s' % attr, ['%.5f' % (x / len(predicts)) for x in count]) #print_confusion_matrix(labels, predicts) print('adjusted f1:class predictions distribution') counts = get_distribution(adjusted_predicts) for attr, count in zip(ATTRIBUTES, counts): print('%-40s' % attr, ['%.5f' % (x / len(predicts)) for x in count]) #print_confusion_matrix(labels, adjusted_predicts) if FLAGS.num_grids: print('adjusted f1:class predictions distribution after grids search') counts = get_distribution(grids_predicts) for attr, count in zip(ATTRIBUTES, counts): print('%-40s' % attr, ['%.5f' % (x / len(grids_predicts)) for x in count]) #print_confusion_matrix(labels, grids_predicts) DEBUG = FLAGS.debug if FLAGS.infer: print('------------infer') ofile = os.path.join(idir, 'ensemble.infer.csv') file_ = gezi.strip_suffix(file_, '.debug') df = pd.read_csv(file_) idx = 2 results = None results2 = None for fid, file_ in enumerate(infer_files): df = pd.read_csv(file_) df = df.sort_values('id') print(fid, file_, len(df)) if not FLAGS.debug: assert len(df) == 200000 if results is None: results = np.zeros([len(df), num_attrs * num_classes]) results2 = np.zeros([len(df), num_attrs * num_classes]) scores = df['score'] scores = [parse(score) for score in scores] scores = np.array(scores) weight = weights[fid] if FLAGS.method == 'avg' and FLAGS.method == 'mean': weight = 1. for i, score in enumerate(scores): score = np.reshape(np.reshape(score, [num_attrs, num_classes]) * weight, [-1]) results[i] += score score = gezi.softmax(np.reshape(score, [num_attrs, num_classes]), -1) score = np.reshape(score, [-1]) results2[i] += score #predicts = to_predict(results2, sum_weights) predicts = to_predict(results, sum_weights) counts = get_distribution(predicts) for attr, count in zip(ATTRIBUTES, counts): print('%-40s' % attr, ['%.5f' % (x / len(predicts)) for x in count]) if not DEBUG: columns = df.columns[idx:idx + num_attrs].values else: columns = df.columns[idx + num_attrs:idx + 2 * num_attrs].values if not DEBUG: ofile = os.path.join(idir, 'ensemble.infer.csv') else: ofile = os.path.join(idir, 'ensemble.valid.csv') if not DEBUG: file_ = gezi.strip_suffix(file_, '.debug') print('temp csv using for write', file_) df = pd.read_csv(file_) else: print('debug test using file', valid_files[-1]) df = pd.read_csv(valid_files[-1]) # for safe must sort id df = df.sort_values('id') # TODO better ? not using loop ? for i, column in enumerate(columns): df[column] = predicts[:, i] if DEBUG: print('check blend result', calc_f1(df.iloc[:, idx:idx + num_attrs].values, predicts)) print(f'adjusted f1_prob:[{adjusted_f1_prob}]') print(f'adjusted f1:[{adjusted_f1}]') print(f'loss:[{loss}]') print('out:', ofile) if not DEBUG: df.to_csv(ofile, index=False, encoding="utf_8_sig") print('---------------results', results.shape) df['score'] = [x for x in results] if not DEBUG: ofile = os.path.join(idir, 'ensemble.infer.debug.csv') else: ofile = os.path.join(idir, 'ensemble.valid.csv') print('out debug:', ofile) df.to_csv(ofile, index=False, encoding="utf_8_sig")
def main(_): print('METHOD:', FLAGS.method) print('Norm factor:', FLAGS.norm_factor) # if FLAGS.grid_search: # FLAGS.debug = False DEBUG = FLAGS.debug idir = FLAGS.idir # first id, sencod content .. idx = 2 # logits sum results results = None # prob sum results results2 = None valid_files = glob.glob(f'{idir}/*.valid.csv') valid_files = [x for x in valid_files if not 'ensemble' in x] if not DEBUG: print('VALID then INFER') infer_files = glob.glob(f'{idir}/*.infer.csv.debug') else: print( 'Debug mode INFER ill write result using valid ids, just for test') infer_files = glob.glob(f'{idir}/*.valid.csv') infer_files = [x for x in infer_files if not 'ensemble' in x] print('num_ensembles', len(valid_files)) print('num_infers', len(infer_files)) assert len(valid_files) == len(infer_files), infer_files global num_ensembles num_ensembles = len(valid_files) # need global ? even only read? global class_weights #print('-----------', class_weights) def sort_table(df, randomize, key='id'): df_list = [] for i in randomize: df_list.append(df[df[key] == i]) return pd.concat(df_list) randomize = None # weights is for per model weight weights = [] scores_list = [] valid_files_ = [] for fid, file_ in enumerate(valid_files): df = pd.read_csv(file_) # if fid != len(valid_files) - 1: # df = df.drop(['content']) #df= df.sort_values('id') if randomize is None: np.random.seed(FLAGS.seed) randomize = np.arange(len(df)) np.random.shuffle(randomize) df['id2'] = randomize #df = sort_table(df, randomize) df = df.sort_values('id2') labels = df.iloc[:, idx:idx + num_attrs].values predicts = df.iloc[:, idx + num_attrs:idx + 2 * num_attrs].values scores = df['score'] scores = [parse(score) for score in scores] scores = np.array(scores) scores_list.append(scores) #f1 = calc_f1(labels, predicts) #f1 = calc_f1(labels, to_predict(scores)) #f1s = calc_f1s(labels, predicts) ## to_predict better # f1_file = gezi.strip_suffix(file_, '.valid.csv') + '.f1s.npy' # f1_adjusted_file = gezi.strip_suffix(file_, '.valid.csv') + '.f1s.adjust.npy' # if not os.path.exists(f1_file): valid_labels = labels[num_train:] labels = labels[:num_train] predicts = predicts[:num_train] scores = scores[:num_train] f1s = calc_f1s(labels, predicts) f1s_adjusted = calc_f1s(labels, to_predict(scores, is_single=True)) probs = gezi.softmax(scores.reshape([-1, NUM_ATTRIBUTES, NUM_CLASSES])) aucs = calc_aucs(labels + 2, probs) losses = calc_losses(labels + 2, probs) # np.save(f1_file, f1s) # np.save(f1_adjusted_file, f1s_adjusted) # else: # f1s = np.load(f1_file) # f1s_adjusted = np.load(f1_adjusted_file) f1 = np.mean(f1s) f1_adjusted = np.mean(f1s_adjusted) print(fid, file_, f1, f1_adjusted, np.mean(aucs), np.mean(losses)) if f1_adjusted < FLAGS.thre: print('ignore', file_) continue else: valid_files_.append(file_) # NOTICE weighted can get 7186 while avg only 716 # and using original f1s score higher #weight = np.reshape(f1s, [num_attrs, 1]) #weight = np.reshape(f1s_adjusted, [num_attrs, 1]) #weight = np.reshape(aucs, [num_attrs, 1]) if FLAGS.weight_by == 'loss': weight = np.reshape(1 / losses, [num_attrs, 1]) elif FLAGS.weight_by == 'auc': weight = np.reshape(aucs, [num_attrs, 1]) else: weight = np.reshape(f1s_adjusted, [num_attrs, 1]) weights.append(weight) weights = np.array(weights) scores_list = np.array(scores_list) blend_weights(weights, FLAGS.norm_factor) # if DEBUG: # print(weights) valid_files = valid_files_ print('final num valid files', len(valid_files)) for fid in tqdm(range(len(valid_files)), ascii=True): scores = scores_list[fid] if results is None: results = np.zeros([len(scores), num_attrs * num_classes]) results2 = np.zeros([len(scores), num_attrs * num_classes]) weight = weights[fid] if FLAGS.method == 'avg' or FLAGS.method == 'mean': weight = 1. for i, score in enumerate(scores): score = np.reshape(score, [num_attrs, num_classes]) * weight score = np.reshape(score, [-1]) results[i] += score # notice softmax([1,2]) = [0.26894142, 0.73105858] softmax([2,4]) = [0.11920292, 0.88079708] score = np.reshape(score, [num_attrs, num_classes]) # this not work because *weight already.. #score *= FLAGS.logits_factor score = gezi.softmax(score, -1) #score *= class_weights score = np.reshape(score, [-1]) results2[i] += score sum_weights = np.sum(weights, 0) adjusted_f1 = calc_f1(valid_labels, to_predict(results[num_train:], sum_weights)) results = np.reshape(results, [-1, num_attrs, num_classes]) predicts = np.argmax(results, -1) - 2 f1 = calc_f1(valid_labels, predicts[num_train:]) print('-----------using logits ensemble') print('f1:', f1) print('adjusted f1:', adjusted_f1) adjusted_f1_prob = calc_f1( valid_labels, to_predict(results2[num_train:], sum_weights, adjust=False)) results2 = np.reshape(results2, [-1, num_attrs, num_classes]) predicts2 = np.argmax(results2, -1) - 2 f1_prob = calc_f1(valid_labels, predicts2[num_train:]) print('-----------using prob ensemble') print('f1_prob:', f1_prob) print('adjusted f1_prob:', adjusted_f1_prob) print('-----------detailed f1 infos (ensemble by prob)') _, adjusted_f1_probs, class_f1s = calc_f1_alls( valid_labels, to_predict(results2[num_train:], sum_weights, adjust=False)) for i, attr in enumerate(ATTRIBUTES): print(attr, adjusted_f1_probs[i]) for i, cls in enumerate(CLASSES): print(cls, class_f1s[i]) print('-----------detailed f1 infos (ensemble by logits)') _, adjusted_f1s, class_f1s = calc_f1_alls( valid_labels, to_predict(results[num_train:], sum_weights)) for i, attr in enumerate(ATTRIBUTES): print(attr, adjusted_f1s[i]) for i, cls in enumerate(CLASSES): print(cls, class_f1s[i]) print(f'adjusted f1_prob:[{adjusted_f1_prob}]') print(f'adjusted f1:[{adjusted_f1}]') class_factors = np.ones([num_attrs, num_classes]) if FLAGS.grid_search: class_factors = grid_search_class_factors(gezi.softmax( np.reshape(results[:num_train], [-1, num_attrs, num_classes]) * (FLAGS.logits_factor / sum_weights)), labels, class_weights, num_grids=FLAGS.num_grids) print('class_factors1') print(class_factors) # adjust class weights to get better result from grid search class_weights = class_weights * class_factors print('after dynamic adjust class factors') adjusted_f1 = calc_f1(valid_labels, to_predict(results[num_train:], sum_weights)) results = np.reshape(results, [-1, num_attrs, num_classes]) predicts = np.argmax(results, -1) - 2 f1 = calc_f1(valid_labels, predicts[num_train:]) print('-----------using logits ensemble') print('f1:', f1) print('adjusted f1:', adjusted_f1) print('-----------detailed f1 infos (ensemble by logits)') _, adjusted_f1s, class_f1s = calc_f1_alls( valid_labels, to_predict(results[num_train:], sum_weights)) for i, attr in enumerate(ATTRIBUTES): print(attr, adjusted_f1s[i]) for i, cls in enumerate(CLASSES): print(cls, class_f1s[i]) print(f'adjusted f1_prob:[{adjusted_f1_prob}]') print(f'adjusted f1:[{adjusted_f1}]')
df = df.sort_values('id') idx = ATTRIBUTES.index(attr) idx = idx * num_classes num_oks = 0 num_errs = 0 for _, row in df.iterrows(): label = row[attr + '_y'] + 2 label = classes[label] predict = row[attr] + 2 predict = classes[predict] score = row[FLAGS.key] score = parse(score) score = score[idx:idx + 4] prob = gezi.softmax(score) id = row['id'] if FLAGS.id: if id != FLAGS.id: continue else: print(id, score) print(id, prob) print(attr, 'label :', label) print(attr, 'predict:', predict) labels = row[2:2+num_attrs] labels += 2 predicts = row[2+num_attrs:2+2*num_attrs] predicts += 2 print(list(zip(ATTRIBUTES, [classes[x] for x in labels]))) print(list(zip(ATTRIBUTES, [classes[x] for x in predicts])))
df = df[df['model'] != 'ensemble'] models_ = df['model'].values files_ = df['file'].values metrics = df['adjusted_f1/mean'].values models = [] files = [] for file, model in tqdm(zip(files_, models_), ascii=True): if not os.path.exists(file): continue df = pd.read_csv(file) df = df.sort_values('id') scores = [gezi.str2scores(x) for x in df['score'].values] scores = np.reshape(scores, [-1, len(ATTRIBUTES), 4]) scores = gezi.softmax(scores) ndf = pd.DataFrame() ndf['score'] = np.reshape(scores, [-1]) dfs.append(ndf) files.append(file) models.append(model) def calc_correlation(x, y, method): if method.startswith('ks'): ks_stat, p_value = ks_2samp(x, y) if method == 'ks_s': score = ks_stat else: score = p_value else: