Пример #1
0
def main(_):
    print('METHOD:', FLAGS.method)
    print('Norm factor:', FLAGS.norm_factor)

    # if FLAGS.num_grids:
    #   FLAGS.debug = False

    DEBUG = FLAGS.debug
    idir = FLAGS.idir

    # first id, sencod content ..
    idx = 2

    # logits sum results
    results = None
    # prob sum results
    results2 = None

    valid_files = glob.glob(f'{idir}/*.valid.csv')
    valid_files = [x for x in valid_files if not 'ensemble' in x]

    if not DEBUG:
        print('VALID then INFER')
        infer_files = glob.glob(f'{idir}/*.infer.csv.debug')
    else:
        print(
            'Debug mode INFER ill write result using valid ids, just for test')
        infer_files = glob.glob(f'{idir}/*.valid.csv')
        infer_files = [x for x in infer_files if not 'ensemble' in x]

    print('num_ensembles', len(valid_files))
    print('num_infers', len(infer_files))

    if not FLAGS.debug:
        assert len(valid_files) == len(infer_files), infer_files

    global num_ensembles
    num_ensembles = len(valid_files)

    # need global ? even only read?
    global class_weights
    #print('-----------', class_weights)

    # weights is for per model weight
    weights = []
    scores_list = []
    valid_files_ = []
    for fid, file_ in enumerate(valid_files):
        df = pd.read_csv(file_)
        df = df.sort_values('id')
        labels = df.iloc[:, idx:idx + num_attrs].values
        predicts = df.iloc[:, idx + num_attrs:idx + 2 * num_attrs].values
        scores = df['score']
        scores = [parse(score) for score in scores]
        scores = np.array(scores)
        scores_list.append(scores)
        #f1 = calc_f1(labels, predicts)
        #f1 = calc_f1(labels, to_predict(scores))
        #f1s = calc_f1s(labels, predicts)
        ## to_predict better
        # f1_file = gezi.strip_suffix(file_, '.valid.csv') + '.f1s.npy'
        # f1_adjusted_file = gezi.strip_suffix(file_, '.valid.csv') + '.f1s.adjust.npy'
        # if not os.path.exists(f1_file):
        f1s = calc_f1s(labels, predicts)
        f1s_adjusted = calc_f1s(labels, to_predict(scores, is_single=True))

        probs = gezi.softmax(scores.reshape([-1, NUM_ATTRIBUTES, NUM_CLASSES]))
        aucs = calc_aucs(labels + 2, probs)
        losses = calc_losses(labels + 2, probs)
        # np.save(f1_file, f1s)
        # np.save(f1_adjusted_file, f1s_adjusted)
        # else:
        #   f1s = np.load(f1_file)
        #   f1s_adjusted = np.load(f1_adjusted_file)
        f1 = np.mean(f1s)
        f1_adjusted = np.mean(f1s_adjusted)

        print(fid, file_, f1, f1_adjusted, np.mean(aucs), np.mean(losses))
        if f1_adjusted < FLAGS.thre:
            print('ignore', file_)
            continue
        else:
            valid_files_.append(file_)

        # NOTICE weighted can get 7186 while avg only 716
        # and using original f1s score higher
        #weight = np.reshape(f1s, [num_attrs, 1])

        #weight = np.reshape(f1s_adjusted, [num_attrs, 1])

        #weight = np.reshape(aucs, [num_attrs, 1])
        if FLAGS.weight_by == 'loss':
            weight = np.reshape(1 / losses, [num_attrs, 1])
        elif FLAGS.weight_by == 'auc':
            weight = np.reshape(aucs, [num_attrs, 1])
        else:
            weight = np.reshape(f1s_adjusted, [num_attrs, 1])

        weights.append(weight)

    weights = np.array(weights)
    scores_list = np.array(scores_list)

    blend_weights(weights, FLAGS.norm_factor)

    # if DEBUG:
    #   print(weights)

    valid_files = valid_files_
    print('final num valid files', len(valid_files))

    for fid in tqdm(range(len(valid_files)), ascii=True):
        scores = scores_list[fid]
        if results is None:
            results = np.zeros([len(scores), num_attrs * num_classes])
            results2 = np.zeros([len(scores), num_attrs * num_classes])
        weight = weights[fid]
        if FLAGS.method == 'avg' or FLAGS.method == 'mean':
            weight = 1.
        for i, score in enumerate(scores):
            score = np.reshape(score, [num_attrs, num_classes]) * weight
            score = np.reshape(score, [-1])

            results[i] += score

            # notice softmax([1,2]) = [0.26894142, 0.73105858] softmax([2,4]) = [0.11920292, 0.88079708]
            score = np.reshape(score, [num_attrs, num_classes])

            # this not work because *weight already..
            #score *= FLAGS.logits_factor

            score = gezi.softmax(score, -1)

            #score *= class_weights

            score = np.reshape(score, [-1])

            results2[i] += score

    sum_weights = np.sum(weights, 0)

    adjusted_f1 = calc_f1(labels, to_predict(results, sum_weights))
    results = np.reshape(results, [-1, num_attrs, num_classes])
    predicts = np.argmax(results, -1) - 2
    f1 = calc_f1(labels, predicts)

    print('-----------using logits ensemble')
    print('f1:', f1)
    print('adjusted f1:', adjusted_f1)

    adjusted_f1_prob = calc_f1(labels,
                               to_predict(results2, sum_weights, adjust=False))
    results2 = np.reshape(results2, [-1, num_attrs, num_classes])
    predicts2 = np.argmax(results2, -1) - 2
    f1_prob = calc_f1(labels, predicts2)

    print('-----------using prob ensemble')
    print('f1_prob:', f1_prob)
    print('adjusted f1_prob:', adjusted_f1_prob)

    print('-----------detailed f1 infos (ensemble by prob)')
    _, adjusted_f1_probs, class_f1s = calc_f1_alls(
        labels, to_predict(results2, sum_weights, adjust=False))

    for i, attr in enumerate(ATTRIBUTES):
        print(attr, adjusted_f1_probs[i])
    for i, cls in enumerate(CLASSES):
        print(cls, class_f1s[i])

    print('-----------detailed f1 infos (ensemble by logits)')
    _, adjusted_f1s, class_f1s = calc_f1_alls(labels,
                                              to_predict(results, sum_weights))

    for i, attr in enumerate(ATTRIBUTES):
        print(attr, adjusted_f1s[i])
    for i, cls in enumerate(CLASSES):
        print(cls, class_f1s[i])

    print(f'adjusted f1_prob:[{adjusted_f1_prob}]')
    print(f'adjusted f1:[{adjusted_f1}]')

    loss = calc_loss(
        labels, gezi.softmax(results.reshape([-1, NUM_ATTRIBUTES,
                                              NUM_CLASSES])))
    print(f'loss:[{loss}]')

    class_factors = np.ones([num_attrs, num_classes])
    if FLAGS.num_grids:
        class_factors = grid_search_class_factors(gezi.softmax(
            np.reshape(results, [-1, num_attrs, num_classes]) *
            (FLAGS.logits_factor / sum_weights)),
                                                  labels,
                                                  class_weights,
                                                  num_grids=FLAGS.num_grids)

    print('class_factors')
    print(class_factors)

    # adjust class weights to get better result from grid search
    class_weights = class_weights * class_factors

    print('after dynamic adjust class factors')
    adjusted_f1 = calc_f1(labels, to_predict(results, sum_weights))
    results = np.reshape(results, [-1, num_attrs, num_classes])
    #predicts = np.argmax(results, -1) - 2
    #f1 = calc_f1(labels, predicts)

    print('-----------using logits ensemble')
    #print('f1:', f1)
    print('adjusted f1:', adjusted_f1)

    print('-----------detailed f1 infos (ensemble by logits)')
    _, adjusted_f1s, class_f1s = calc_f1_alls(labels,
                                              to_predict(results, sum_weights))

    for i, attr in enumerate(ATTRIBUTES):
        print(attr, adjusted_f1s[i])
    for i, cls in enumerate(CLASSES):
        print(cls, class_f1s[i])

    # print(f'adjusted f1_prob:[{adjusted_f1_prob}]')
    # print(f'adjusted f1:[{adjusted_f1}]')

    #-------------infer
    print('------------infer')
    ofile = os.path.join(idir, 'ensemble.infer.csv')
    file_ = gezi.strip_suffix(file_, '.debug')
    df = pd.read_csv(file_)

    idx = 2
    results = None
    results2 = None
    for fid, file_ in enumerate(infer_files):
        df = pd.read_csv(file_)
        df = df.sort_values('id')
        print(fid, file_, len(df))
        if not FLAGS.debug:
            assert len(df) == 200000
        if results is None:
            results = np.zeros([len(df), num_attrs * num_classes])
            results2 = np.zeros([len(df), num_attrs * num_classes])
        scores = df['score']
        scores = [parse(score) for score in scores]
        scores = np.array(scores)
        weight = weights[fid]
        if FLAGS.method == 'avg' and FLAGS.method == 'mean':
            weight = 1.
        for i, score in enumerate(scores):
            score = np.reshape(
                np.reshape(score, [num_attrs, num_classes]) * weight, [-1])
            results[i] += score
            score = gezi.softmax(np.reshape(score, [num_attrs, num_classes]),
                                 -1)
            score = np.reshape(score, [-1])
            results2[i] += score

    #predicts = to_predict(results2, sum_weights)
    predicts = to_predict(results, sum_weights)

    if not DEBUG:
        columns = df.columns[idx:idx + num_attrs].values
    else:
        columns = df.columns[idx + num_attrs:idx + 2 * num_attrs].values

    if not DEBUG:
        ofile = os.path.join(idir, 'ensemble.infer.csv')
    else:
        ofile = os.path.join(idir, 'ensemble.valid.csv')

    if not DEBUG:
        file_ = gezi.strip_suffix(file_, '.debug')
        print('temp csv using for write', file_)
        df = pd.read_csv(file_)
    else:
        print('debug test using file', valid_files[-1])
        df = pd.read_csv(valid_files[-1])

    # for safe must sort id
    df = df.sort_values('id')

    # TODO better ? not using loop ?
    for i, column in enumerate(columns):
        df[column] = predicts[:, i]

    if DEBUG:
        print('check blend result',
              calc_f1(df.iloc[:, idx:idx + num_attrs].values, predicts))
    print(f'adjusted f1_prob:[{adjusted_f1_prob}]')
    print(f'adjusted f1:[{adjusted_f1}]')
    print(f'loss:[{loss}]')

    print('out:', ofile)
    if not DEBUG:
        df.to_csv(ofile, index=False, encoding="utf_8_sig")

    print('---------------results', results.shape)
    df['score'] = [x for x in results]
    factor = FLAGS.logits_factor / sum_weights
    #print('--------sum_weights', sum_weights)
    #print('--------factor', factor)
    logits = np.reshape(results, [-1, num_attrs, num_classes])
    # DO NOT USE *=... will change results...
    logits = logits * factor
    probs = gezi.softmax(logits, -1)
    probs *= class_weights
    logits = np.reshape(logits, [-1, num_attrs * num_classes])
    print('---------------logits', logits.shape)
    print('----results', results)
    print('----logits', logits)
    #df['logit'] = [x for x in logits]
    probs = np.reshape(probs, [-1, num_attrs * num_classes])
    print('---------------probs', probs.shape)
    #df['prob'] = [x for x in probs]

    if not DEBUG:
        ofile = os.path.join(idir, 'ensemble.infer.debug.csv')
    else:
        ofile = os.path.join(idir, 'ensemble.valid.csv')
    print('out debug:', ofile)
    df.to_csv(ofile, index=False, encoding="utf_8_sig")
Пример #2
0
# model.ckpt-3.00-9846.valid.metrics
# ckpt-4.valid.metrics 
res = []
for dir_ in glob.glob(f'{model_dir}/*/*'):
  if not os.path.isdir(dir_):
    continue
  best_score = 0 if not 'loss' in key else 1e10
  best_epoch = None

  files = glob.glob(f'{dir_}/epoch/*.valid.metrics')
  if not files:
    files = glob.glob(f'{dir_}/ckpt/*.valid.metrics')

  find = False
  for file_ in files: 
    epoch = int(float(gezi.strip_suffix(file_, 'valid.metrics').split('-')[1]))
    for line in open(file_):
      name, score = line.strip().split()
      score = float(score)
      if name != key:
        continue 
      if cmp(score, best_score):
        find = True
        best_score = score
        best_epoch = epoch
  if find:
    #print(dir_)
    #print('best_epoch:', best_epoch, 'best_score:', best_score) 
    res.append((dir_.replace('../', ''), best_epoch, best_score))

  res.sort(key=lambda x: x[-1], reverse=not 'loss' in key)
Пример #3
0
#model_dir = '/home/gezi/data3/v11/lstm-or-sp20w/3/submit/'
#model_dir = '/home/gezi/data3/v11/nbert2/'
#model_dir = '/home/gezi/data3/v11/bert3/'
model_dir = '/home/gezi/data3/v11/submit.1115.2.2.2/slim/'
ensembel_dir = model_dir

valid_files = glob.glob('%s/*.valid.csv' % ensembel_dir)
infer_files = glob.glob('%s/*.infer.csv' % ensembel_dir)

valid_files = [x for x in valid_files if not 'ensemble' in x]
infer_files = [x for x in infer_files if not 'ensemble' in x]

#assert len(valid_files) == len(infer_files)
for i, file_ in enumerate(valid_files):
    file_ = os.path.basename(file_)
    file_ = gezi.strip_suffix(file_, '.valid.csv')
    src, model = file_.split('_', 1)
    cell = 'gru'
    if 'lstm' in model:
        cell = 'lstm'

    model = model.replace('.gru', '').replace('.lstm', '')

    pattern = '_model.ckpt-'
    if pattern in model:
        script = model.split(pattern)[0]
    else:
        pattern = '_ckpt-'
        script = model.split(pattern)[0]

    #command = f'MODE=test INFER=1 SRC={src} CELL={cell} sh ./infer/v11/{script}.sh {model_dir}{file_}'
Пример #4
0

if __name__ == '__main__':
    load_class_weights()
    input = FLAGS.i
    os.makedirs('./bak', exist_ok=True)
    if os.path.isdir(input):
        df = pd.DataFrame()
        fnames = []
        mnames = []
        m = {}
        for file in glob.glob('%s/*valid.csv' % input):
            try:
                fname = os.path.basename(file)
                fnames.append(fname)
                fname = gezi.strip_suffix(fname, '.valid.csv')

                if 'ensemble' in file:
                    mname = file
                    suffix = ''
                else:
                    if '_ckpt-' in fname:
                        mname, suffix = fname.split('_ckpt-')
                    else:
                        mname, suffix = fname.split('_model.ckpt-')

                mnames.append(mname)

                vals, names = evaluate_file(file)
                for val, name in zip(vals, names):
                    if name not in m:
for dir_ in glob.glob(f'{model_dir}/*/*'):
    if not os.path.isdir(dir_):
        continue
    print(dir_)
    best_score = 0 if key != 'loss' else 1e10
    best_epoch = None
    best_iepoch = None

    in_epoch_dir = True
    files = glob.glob(f'{dir_}/epoch/*.valid.metrics')
    if not files:
        in_epoch_dir = False
        files = glob.glob(f'{dir_}/ckpt/*.valid.metrics')

    for file_ in files:
        epoch = gezi.strip_suffix(file_, 'valid.metrics').split('-')[1]
        iepoch = int(float(epoch))
        for line in open(file_):
            name, score = line.strip().split()
            score = float(score)
            if name != key:
                continue
            if cmp(score, best_score):
                best_score = score
                best_epoch = epoch
                best_iepoch = iepoch
    print('best_epoch:', best_epoch, 'best_score:', best_score)
    if best_epoch and best_score > thre:
        if in_epoch_dir:
            command = f'ensemble-cp.py {dir_}/epoch/model.ckpt-{best_epoch}'
        else:
Пример #6
0
key = 'adjusted_f1/mean'

if len(sys.argv) > 2:
    key = sys.argv[2]

print('key', key)

if not 'loss' in key:
    cmp = lambda x, y: x > y
else:
    cmp = lambda x, y: x < y

# model.ckpt-3.00-9846.valid.metrics
# ckpt-4.valid.metrics
res = []
for file_ in glob.glob(f'{model_dir}/*.valid.metrics'):
    epoch = int(float(gezi.strip_suffix(file_, 'valid.metrics').split('-')[1]))
    for line in open(file_):
        name, score = line.strip().split()
        score = float(score)
        if name != key:
            continue
        res.append((gezi.strip_suffix(file_.replace('./', ''),
                                      '.valid.metrics'), epoch, score))

res.sort(key=lambda x: x[-1], reverse=not 'loss' in key)

for file_, epoch, score in res:
    print('%.5f' % score, epoch, file_)
idx = 2
results = None
for file_ in glob.glob('%s/*.infer.csv.debug' % idir):
    df = pd.read_csv(file_)
    predicts = df.iloc[:, idx:idx + num_attrs].values
    print(file_)
    if results is None:
        results = np.zeros([len(df), num_attrs * 4])
    scores = df['score']
    for i, score in enumerate(scores):
        score = parse(score)
        # score = gezi.softmax(np.reshape(score, [num_attrs, 4]), -1)
        # score = np.reshape(score, [-1])
        results[i] += score

#results = np.reshape(results, [-1, num_attrs, 4])
#predicts = np.argmax(results, -1) - 2
predicts = to_predict(results)

columns = df.columns[idx:idx + num_attrs].values

ofile = os.path.join(idir, 'ensemble.infer.csv')
file_ = gezi.strip_suffix(file_, '.debug')
df = pd.read_csv(file_)

# TODO better ? not using loop ?
for i, column in enumerate(columns):
    df[column] = predicts[:, i]

print('out:', ofile)
df.to_csv(ofile, index=False, encoding="utf_8_sig")
Пример #8
0
def main(_):
  print('METHOD:', FLAGS.method)
  print('Norm factor:', FLAGS.norm_factor) 

  DEBUG = FLAGS.debug 
  idir = FLAGS.idir

  if not DEBUG:
    FLAGS.infer = True
    FLAGS.num_folds = 1
    #FLAGS.num_grids = 10 

  # first id, sencod content ..
  idx = 2

  valid_files = glob.glob(f'{idir}/*.valid.csv')
  valid_files = [x for x in valid_files if not 'ensemble' in x]
  
  if not DEBUG:
    print('VALID then INFER')
    infer_files = glob.glob(f'{idir}/*.infer.csv.debug')
  else:
    print('Debug mode INFER ill write result using valid ids, just for test')
    infer_files = glob.glob(f'{idir}/*.valid.csv') 
    infer_files = [x for x in infer_files if not 'ensemble' in x]

  print('num_ensembles', len(valid_files), 'num_infers', len(infer_files))    
  assert len(valid_files) == len(infer_files), infer_files

  global num_ensembles
  num_ensembles = len(valid_files)

  # need global ? even only read?
  global class_weights
  #print('-----------', class_weights)

  print('loading all valid csv')
  dfs = []
  for file_ in tqdm(valid_files, ascii=True):
    df = pd.read_csv(file_)
    df = df.sort_values('id')
    dfs.append(df)

  if FLAGS.num_folds > 1:
    kf = KFold(n_splits=FLAGS.num_folds, shuffle=True, random_state=FLAGS.seed)
    dataset = kf.split(dfs[0])
  else:
    ids = dfs[0]['id'].values
    dataset = [(ids, ids)]
  
  logits_f1_list = []
  logits_adjusted_f1_list = []
  probs_f1_list = []
  probs_adjusted_f1_list = []
  grids_logits_adjusted_f1_list = []

  logits_predict_list = []
  logits_adjusted_predict_list = []
  probs_predict_list = []
  probs_adjusted_predict_list = []
  grids_logits_adjusted_predict_list = []

  labels_list = []
  results_list = []

  def split_train_valid(x):
    if FLAGS.num_folds == 1:
      return x, x 
    else:
      total = 15000
      assert total % FLAGS.num_folds == 0
      num_valid = int(total / FLAGS.num_folds) 
      num_train = total - num_valid
      return x[:num_train], x[num_train:]

  for fold, (train_index, valid_index) in enumerate(dataset):
    print('FOLD_%s---------------------------' % fold)
    print('train:', train_index,  'valid:', valid_index)
    class_factors = np.ones([num_attrs, num_classes])
    class_weights = ori_class_weights
    # logits sum results
    results = None
    # prob sum results
    results2 = None

    weights = [] 
    scores_list = []

    for fid, df in enumerate(dfs):
      file_ = valid_files[fid]
      train = df.iloc[train_index]
      valid =  df.iloc[valid_index]
      #if fid == 0:      
      train_labels = train.iloc[:, idx:idx+num_attrs].values
      valid_labels = valid.iloc[:, idx:idx+num_attrs].values
      labels = np.concatenate([train_labels, valid_labels], 0)
      train_predicts = train.iloc[:, idx+num_attrs:idx+2*num_attrs].values
      valid_predicts = valid.iloc[:, idx+num_attrs:idx+2*num_attrs].values
      predicts = np.concatenate([train_predicts, valid_predicts], 0)
      train_scores = train['score']
      valid_scores = valid['score']
      scores = np.concatenate([train_scores, valid_scores], 0)
      scores = [parse(score) for score in scores] 
      scores = np.array(scores)
      scores_list.append(scores)     

      train_labels, valid_labels = split_train_valid(labels)
      train_predicts, valid_predicts = split_train_valid(predicts)
      train_scores, valid_scores = split_train_valid(scores)
  
      f1s = calc_f1s(train_labels, train_predicts)
      f1s_adjusted = calc_f1s(train_labels, to_predict(train_scores, is_single=True))

      train_probs = gezi.softmax(train_scores.reshape([-1, NUM_ATTRIBUTES, NUM_CLASSES]))
      aucs = calc_aucs(train_labels + 2, train_probs)
      losses = calc_losses(train_labels + 2, train_probs)

      f1 = np.mean(f1s)
      f1_adjusted = np.mean(f1s_adjusted)
      
      print('%-3d' % fid, '%-100s' % file_, '%.5f' % f1, '%.5f' % f1_adjusted, '%.5f' % np.mean(aucs), '%.5f' % np.mean(losses)) 
      
      if FLAGS.weight_by == 'loss':
        weight = np.reshape(1 / losses, [num_attrs, 1])
      elif FLAGS.weight_by == 'auc':
        weight = np.reshape(aucs, [num_attrs, 1])
      else:
        weight = np.reshape(f1s_adjusted, [num_attrs, 1])

      weights.append(weight) 

    weights = np.array(weights)
    scores_list = np.array(scores_list)

    weights = blend(weights, FLAGS.norm_factor)
    sum_weights = np.sum(weights, 0)

    # print('weights\n', weights)
    # print('sum_weights\n', sum_weights)

    # if DEBUG:
    #   print(weights)
    print('-----------calc weight and score')
    for fid in tqdm(range(len(valid_files)), ascii=True):
      scores = scores_list[fid]
      if results is None:
        results = np.zeros([len(scores), num_attrs * num_classes])
        results2 = np.zeros([len(scores), num_attrs * num_classes])
      weight = weights[fid]
      #print(fid, valid_files[fid], '\n', ['%.5f' % x for x in np.reshape(weight, [-1])])
      if FLAGS.method == 'avg' or FLAGS.method == 'mean': 
        weight = 1.
      for i, score in enumerate(scores):
        score = np.reshape(score, [num_attrs, num_classes]) * weight
        score = np.reshape(score, [-1])
      
        results[i] += score

        # notice softmax([1,2]) = [0.26894142, 0.73105858] softmax([2,4]) = [0.11920292, 0.88079708]
        score = np.reshape(score, [num_attrs, num_classes])
        
        # this not work because *weight already..
        #score *= FLAGS.logits_factor
        
        score = gezi.softmax(score, -1)
        
        #score *= class_weights

        score = np.reshape(score, [-1])
        
        results2[i] += score 

    train_results, valid_results = split_train_valid(results)
    train_results2, valid_results2 = split_train_valid(results2)

    print('-----------using prob ensemble')
    adjusted_predict_prob = to_predict(valid_results2, sum_weights, adjust=False)
    adjusted_f1_prob = calc_f1(valid_labels, adjusted_predict_prob)
    valid_results2 = np.reshape(valid_results2, [-1, num_attrs, num_classes]) 
    predicts2 = np.argmax(valid_results2, -1) - 2
    f1_prob = calc_f1(valid_labels, predicts2)

    probs_f1_list.append(f1_prob)
    probs_adjusted_f1_list.append(adjusted_f1_prob)
    
    probs_predict_list.append(predicts2)
    probs_adjusted_predict_list.append(adjusted_predict_prob)
    
    print('%-40s' % 'f1_prob:', '%.5f' % f1_prob)
    print('%-40s' % 'adjusted f1_prob:', '%.5f' % adjusted_f1_prob)

    # print('-----------detailed f1 infos (ensemble by prob)')
    # _, adjusted_f1_probs, class_f1s = calc_f1_alls(valid_labels, to_predict(results2[num_train:], sum_weights, adjust=False))

    # for i, attr in enumerate(ATTRIBUTES):
    #   print(attr, adjusted_f1_probs[i])
    # for i, cls in enumerate(CLASSES):
    #   print(cls, class_f1s[i])

    print('-----------using logits ensemble')
    adjusted_predict = to_predict(valid_results, sum_weights)
    adjusted_f1 = calc_f1(valid_labels, adjusted_predict)
    valid_results = np.reshape(valid_results, [-1, num_attrs, num_classes]) 
    predicts = np.argmax(valid_results, -1) - 2
    f1 = calc_f1(valid_labels, predicts)

    logits_f1_list.append(f1)
    logits_adjusted_f1_list.append(adjusted_f1)

    logits_predict_list.append(predicts)
    logits_adjusted_predict_list.append(adjusted_predict)

    results_list.append(valid_results)
    labels_list.append(valid_labels)
    
    print('%-40s' % 'f1:', '%.5f' % f1)
    print('%-40s' % 'adjusted f1:', '%.5f' % adjusted_f1)

    if FLAGS.show_detail:
      print('-----------detailed f1 infos (ensemble by logits)')
      _, adjusted_f1s, class_f1s = calc_f1_alls(valid_labels, to_predict(valid_results, sum_weights))
      for i, attr in enumerate(ATTRIBUTES):
        print('%-40s' % attr, '%.5f' % adjusted_f1s[i])
      for i, cls in enumerate(CLASSES):
        print('%-40s' % cls, '%.5f' % class_f1s[i])

    print('%-40s' % 'f1:', '%.5f' % f1)
    print('%-40s' % 'f1 prob:', '%.5f' % f1_prob)
    print('%-40s' % 'adjusted f1 prob:', '%.5f' % adjusted_f1_prob)
    print('%-40s' % 'adjusted f1:', '%.5f' % adjusted_f1)

    if FLAGS.num_grids:
      print('------------grid search num_grids', FLAGS.num_grids)
      class_factors = grid_search_class_factors(gezi.softmax(np.reshape(train_results, [-1, num_attrs, num_classes]) * (FLAGS.logits_factor / sum_weights)), train_labels, class_weights, num_grids=FLAGS.num_grids)
        
      if FLAGS.show_detail:
        print('class_factors1 with num_grids', FLAGS.num_grids)
        print(class_factors)

      # adjust class weights to get better result from grid search 
      class_weights = class_weights * class_factors

      adjusted_f1_before_grids = adjusted_f1
      print('after dynamic adjust class factors')
      adjusted_predict = to_predict(valid_results, sum_weights)
      adjusted_f1 = calc_f1(valid_labels, adjusted_predict)
      valid_results = np.reshape(valid_results, [-1, num_attrs, num_classes]) 

      grids_logits_adjusted_f1_list.append(adjusted_f1)  
      grids_logits_adjusted_predict_list.append(adjusted_predict)

      print('-----------using logits ensemble')
      print('%-40s' % 'adjusted f1 before grids:', '%.5f' % adjusted_f1_before_grids)
      print('%-40s' % 'adjusted f1:', '%.5f' % adjusted_f1)

      if FLAGS.show_detail:
        print('-----------detailed f1 infos (ensemble by logits)')
        _, adjusted_f1s, class_f1s = calc_f1_alls(valid_labels, to_predict(valid_results, sum_weights))
        for i, attr in enumerate(ATTRIBUTES):
          print('%-40s' % attr, '%.5f' % adjusted_f1s[i])
        for i, cls in enumerate(CLASSES):
          print('%-40s' % cls, '%.5f' % class_f1s[i])
      print('%-40s' % 'adjusted f1 before grids:', '%.5f' % adjusted_f1_before_grids)
      print('%-40s' % 'adjusted f1:', '%.5f' % adjusted_f1)

  # print('-------------------------------------OVERALL mean')
  # print('ensemble by probs')
  # print('%-40s' % 'f1', '%.5f' % np.mean(probs_f1_list))
  # print('%-40s' % 'adjustedf f1', '%.5f' % np.mean(probs_adjusted_f1_list))
  
  # print('ensemble by logits')
  # print('%-40s' % 'f1:', '%.5f' % np.mean(logits_f1_list))
  # print('%-40s' % 'adjusted f1:', '%.5f' % np.mean(logits_adjusted_f1_list))

  # if FLAGS.num_grids:
  #   print('ensemble by logits after grid search')
  #   print('%-40s' % 'adjusted f1', '%.5f' % np.mean(grids_logits_adjusted_f1_list))

  print('-------------------------------------OVERALL recalc')
  labels = np.concatenate(labels_list, 0)
  print('ensemble by probs')
  print('%-40s' % 'f1', '%.5f' % calc_f1(labels, np.concatenate(probs_predict_list, 0)))
  print('%-40s' % 'adjustedf f1', '%.5f' % calc_f1(labels, np.concatenate(probs_adjusted_predict_list, 0)))

  print('ensemble by logits')
  predicts = np.concatenate(logits_predict_list, 0)
  print('%-40s' % 'f1:', '%.5f' % calc_f1(labels, predicts))
  adjusted_predicts = np.concatenate(logits_adjusted_predict_list, 0)
  print('%-40s' % 'adjusted f1:', '%.5f' % calc_f1(labels, adjusted_predicts))

  if FLAGS.num_grids:
    print('ensemble by logits after grid search')
    grids_predicts = np.concatenate(grids_logits_adjusted_predict_list, 0)
    print('%-40s' % 'adjusted f1 after grid search', '%.5f' % calc_f1(labels, grids_predicts))

  _, adjusted_f1s, class_f1s = calc_f1_alls(labels, adjusted_predicts)
  for i, attr in enumerate(ATTRIBUTES):
    print('%-40s' % attr, '%.5f' % adjusted_f1s[i])
  for i, cls in enumerate(CLASSES):
    print('%-40s' % cls, '%.5f' % class_f1s[i])
  print('%-40s' % 'f1', '%.5f' % calc_f1(labels, predicts))
  print('%-40s' % 'adjusted f1', '%.5f' % calc_f1(labels, adjusted_predicts))
  if FLAGS.num_grids:
    print('%-40s' % 'adjusted f1 after grid search', '%.5f' % calc_f1(labels, grids_predicts))

  results = np.concatenate(results_list, 0)
  results = results.reshape([-1, NUM_ATTRIBUTES, NUM_CLASSES]) 
  #factor =  FLAGS.logits_factor / sum_weights
  #print('%-40s' % '* factor loss', '%.5f' % calc_loss(labels, gezi.softmax(results * factor)))
  ## directly do softmax on results since sum weights is 1
  loss = calc_loss(labels, gezi.softmax(results))
  print('%-40s' % 'loss', '%.5f' % loss)

  print('f1:class predictions distribution')
  counts = get_distribution(predicts)
  for attr, count in zip(ATTRIBUTES, counts):
    print('%-40s' % attr, ['%.5f' % (x / len(predicts)) for x in count])
  #print_confusion_matrix(labels, predicts)

  print('adjusted f1:class predictions distribution')
  counts = get_distribution(adjusted_predicts)
  for attr, count in zip(ATTRIBUTES, counts):
    print('%-40s' % attr, ['%.5f' % (x / len(predicts)) for x in count])
  #print_confusion_matrix(labels, adjusted_predicts)

  if FLAGS.num_grids:
    print('adjusted f1:class predictions distribution after grids search')
    counts = get_distribution(grids_predicts)
    for attr, count in zip(ATTRIBUTES, counts):
      print('%-40s' % attr, ['%.5f' % (x / len(grids_predicts)) for x in count])
    #print_confusion_matrix(labels, grids_predicts)

  DEBUG = FLAGS.debug
  if FLAGS.infer:
    print('------------infer')
    ofile = os.path.join(idir, 'ensemble.infer.csv')
    file_ = gezi.strip_suffix(file_, '.debug')
    df = pd.read_csv(file_)

    idx = 2
    results = None
    results2 = None
    for fid, file_ in enumerate(infer_files):
      df = pd.read_csv(file_)
      df = df.sort_values('id')
      print(fid, file_, len(df))
      if not FLAGS.debug:
        assert len(df) == 200000
      if results is None:
        results = np.zeros([len(df), num_attrs * num_classes])
        results2 = np.zeros([len(df), num_attrs * num_classes])
      scores = df['score']
      scores = [parse(score) for score in scores]
      scores = np.array(scores) 
      weight = weights[fid] 
      if FLAGS.method == 'avg' and FLAGS.method == 'mean': 
        weight = 1.
      for i, score in enumerate(scores):
        score = np.reshape(np.reshape(score, [num_attrs, num_classes]) * weight, [-1])
        results[i] += score
        score = gezi.softmax(np.reshape(score, [num_attrs, num_classes]), -1)
        score = np.reshape(score, [-1])
        results2[i] += score 

    #predicts = to_predict(results2, sum_weights)
    predicts = to_predict(results, sum_weights)

    counts = get_distribution(predicts)
    for attr, count in zip(ATTRIBUTES, counts):
      print('%-40s' % attr, ['%.5f' % (x / len(predicts)) for x in count])

    if not DEBUG:
      columns = df.columns[idx:idx + num_attrs].values
    else:
      columns = df.columns[idx + num_attrs:idx + 2 * num_attrs].values

    if not DEBUG:
      ofile = os.path.join(idir, 'ensemble.infer.csv')
    else:
      ofile = os.path.join(idir, 'ensemble.valid.csv')

    if not DEBUG:
      file_ = gezi.strip_suffix(file_, '.debug')
      print('temp csv using for write', file_)
      df = pd.read_csv(file_)
    else:
      print('debug test using file', valid_files[-1])
      df = pd.read_csv(valid_files[-1])

    # for safe must sort id
    df = df.sort_values('id')

    # TODO better ? not using loop ?
    for i, column in enumerate(columns):
      df[column] = predicts[:, i]

    if DEBUG:
      print('check blend result', calc_f1(df.iloc[:, idx:idx + num_attrs].values, predicts))
    print(f'adjusted f1_prob:[{adjusted_f1_prob}]')
    print(f'adjusted f1:[{adjusted_f1}]')
    print(f'loss:[{loss}]')

    print('out:', ofile)
    if not DEBUG:
      df.to_csv(ofile, index=False, encoding="utf_8_sig")

    print('---------------results', results.shape)
    df['score'] = [x for x in results] 

    if not DEBUG:
      ofile = os.path.join(idir, 'ensemble.infer.debug.csv')
    else:
      ofile = os.path.join(idir, 'ensemble.valid.csv')
    print('out debug:', ofile)
    df.to_csv(ofile, index=False, encoding="utf_8_sig")