y = pd.concat([y_gal.replace(di_gal), y_exgal.replace(di_exgal)], ignore_index=True) y_pred_gal = pd.DataFrame(y_pred_gal) y_pred_gal.columns = [f'class_{di_gal[x]}' for x in range(len(di_gal))] y_pred_exgal = pd.DataFrame(y_pred_exgal) y_pred_exgal.columns = [f'class_{di_exgal[x]}' for x in range(len(di_exgal))] y_pred = pd.concat([y_pred_gal, y_pred_exgal], ignore_index=True).fillna(0) y_pred = y_pred[[f'class_{c}' for c in utils_metric.classes]] loss = utils_metric.multi_weighted_logloss(y.values, y_pred.values) # ============================================================================= # weight # ============================================================================= import utils_post y_true = pd.get_dummies(y) weight = utils_post.get_weight(y_true, y_pred.values, eta=0.1, nround=9999) weight = np.append(weight, 1) print(list(weight)) # =============================================================================
sub_tr = pd.concat([sub_tr, oof], axis=1) sub_tr.columns = ['object_id'] + [ f'class_{i}' for i in sorted(classes_gal + classes_exgal) ] sub_tr.loc[sub_tr.object_id.isin(oid_gal), [f'class_{i}' for i in classes_exgal]] = 0 sub_tr.loc[sub_tr.object_id.isin(oid_exgal), [f'class_{i}' for i in classes_gal]] = 0 weight = np.array([1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1]) weight = weight / sub_tr.iloc[:, 1:].sum() weight = weight.values y_pred = sub_tr.iloc[:, 1:].values.astype(float) print('before:', utils_metric.multi_weighted_logloss(y.values, y_pred)) print('after:', utils_metric.multi_weighted_logloss(y.values, y_pred * weight)) utils.plot_confusion_matrix(__file__, y_pred * weight) # ============================================================================= # weight # ============================================================================= import utils_post y_pred *= weight y_true = pd.get_dummies(y) weight = utils_post.get_weight(y_true, y_pred, eta=0.1, nround=9999) print(f'weight: np.array({list(weight)})')
wloss_list.append(ret['wloss-mean'][-1]) for i, y_pred in enumerate(y_preds): y_pred = utils_metric.softmax(y_pred.astype(float).values) if i == 0: y_preds_ = y_pred else: y_preds_ += y_pred y_preds_ /= len(y_preds) # ============================================================================= # # ============================================================================= utils_metric.multi_weighted_logloss(y, y_preds_) def multi_weighted_logloss(y_true: np.array, y_preds: np.array): """ @author olivier https://www.kaggle.com/ogrellier multi logloss for PLAsTiCC challenge """ # class_weights taken from Giba's topic : https://www.kaggle.com/titericz # https://www.kaggle.com/c/PLAsTiCC-2018/discussion/67194 # with Kyle Boone's post https://www.kaggle.com/kyleboone classes = [6, 15, 16, 42, 52, 53, 62, 64, 65, 67, 88, 90, 92, 95] class_weight = { 6: 1, 15: 2, 16: 1,
1.1768777584208459, 0.9498970981272328, 0.6113702626667485, 0.48242068928933035, 1.2894930889416614, 1.423971561601788, 0.6535155757119984, 1.6161049089839221, 0.5743188118409728, 1.1906849086994178, 0.6527050232072442, 0.42181435682919677, 0.9394690895273552, 1.061672745432284 ]) classes_gal = [6, 16, 53, 65, 92] classes_exgal = [15, 42, 52, 62, 64, 67, 88, 90, 95] sub_tr = utils.load_train(['object_id']) sub_tr = pd.concat([sub_tr, oof2], axis=1) sub_tr.columns = ['object_id'] + [ f'class_{i}' for i in sorted(classes_gal + classes_exgal) ] sub_tr.loc[sub_tr.object_id.isin(oid_gal), [f'class_{i}' for i in classes_exgal]] = 0 sub_tr.loc[sub_tr.object_id.isin(oid_exgal), [f'class_{i}' for i in classes_gal]] = 0 oof2 = sub_tr.iloc[:, 1:].values.astype(float) * weight oof = (oof1 + oof2) / 2 y = utils.load_target().target print('oof1:', utils_metric.multi_weighted_logloss(y.values, oof1)) print('oof2:', utils_metric.multi_weighted_logloss(y.values, oof2)) print('ave:', utils_metric.multi_weighted_logloss(y.values, oof))
def plot_confusion_matrix(__file__, y_pred, normalize=True, title='Confusion Matrix'): import matplotlib as mpl mpl.use('Agg') from matplotlib import pyplot as plt from sklearn.metrics import confusion_matrix import itertools import utils_metric classes = ['class_6', 'class_15', 'class_16', 'class_42', 'class_52', 'class_53', 'class_62', 'class_64', 'class_65', 'class_67', 'class_88', 'class_90', 'class_92', 'class_95'] y = load_target().target target_dict = {} target_dict_r = {} for i,e in enumerate(y.sort_values().unique()): target_dict[e] = i target_dict_r[i] = e y = y.replace(target_dict).values score = utils_metric.multi_weighted_logloss(y, y_pred) cnf_matrix = confusion_matrix(y, np.argmax(y_pred, axis=-1)) np.set_printoptions(precision=2) if normalize: cnf_matrix = cnf_matrix.astype('float') / cnf_matrix.sum(axis=1)[:, np.newaxis] print("Normalized confusion matrix") else: print('Confusion matrix, without normalization') print(cnf_matrix) plt.figure(figsize=(12,12)) plt.imshow(cnf_matrix, interpolation='nearest', cmap=plt.cm.Blues) plt.title(f'{title}: {round(score, 5)}') plt.colorbar() tick_marks = np.arange(len(classes)) plt.xticks(tick_marks, classes, rotation=45) plt.yticks(tick_marks, classes) fmt = '.2f' if normalize else 'd' thresh = cnf_matrix.max() / 2. for i, j in itertools.product(range(cnf_matrix.shape[0]), range(cnf_matrix.shape[1])): plt.text(j, i, format(cnf_matrix[i, j], fmt), horizontalalignment="center", color="white" if cnf_matrix[i, j] > thresh else "black") plt.ylabel('True label') plt.xlabel('Predicted label') plt.tight_layout() plt.savefig(f'LOG/CM_{__file__}.png') send_line(f'Confusion Matrix wmlogloss: {score}', png=f'LOG/CM_{__file__}.png') return
imp = ex.getImp(model_all) imp['split'] /= imp['split'].max() imp['gain'] /= imp['gain'].max() imp['total'] = imp['split'] + imp['gain'] imp.sort_values('total', ascending=False, inplace=True) imp.reset_index(drop=True, inplace=True) imp.to_csv(f'LOG/imp_{__file__}.csv', index=False) # ============================================================================= # eval # ============================================================================= for i, y_pred in enumerate(y_preds): y_pred = pd.DataFrame(utils_metric.softmax(y_pred.astype(float).values)) if i == 0: tmp = y_pred else: tmp += y_pred tmp /= len(y_preds) y_preds = tmp.copy().values.astype(float) w_score = utils_metric.multi_weighted_logloss(y.values, y_preds) a_score = utils_metric.akiyama_metric(y.values, y_preds) print(f'{w_score} {a_score}') #============================================================================== utils.end(__file__) utils.stop_instance()