#             'models/NBSVM/slim/nbsvm_submission.csv',
#             'models/RNN/pavel_attention_slim2/l2_test_data.csv',
#             'models/RNN/pavel_all_outs_slim/l2_test_data.csv']

csv_files = [
    'models/PUBLIC/' + fn for fn in os.listdir('models/PUBLIC/')
    if fn.endswith('.csv')
]

test_predicts_list = []
for csv_file in csv_files:
    orig_submission = pd.read_csv(csv_file)
    predictions = orig_submission[LIST_CLASSES]
    test_predicts_list.append(predictions)

corr_matrix([p.values for p in test_predicts_list])


def bag_by_average(test_predicts_list):
    bagged_predicts = np.zeros(test_predicts_list[0].shape)
    for predict in test_predicts_list:
        bagged_predicts += predict

    bagged_predicts /= len(test_predicts_list)
    return bagged_predicts


def bag_by_geomean(test_predicts_list):
    bagged_predicts = np.ones(test_predicts_list[0].shape)
    for predict in test_predicts_list:
        bagged_predicts *= predict
Exemplo n.º 2
0
        ys = [df[LIST_CLASSES].values for df in dfs]

        for i, _ in enumerate(csv_files[1:]):
            assert np.array_equal(ys[0], ys[i])

        Y = ys[0]
        return X, Y
    else:
        return X



X, Y = get_values(csvs_train,columns=LIST_LOGITS,hstack=False,with_labels=True)

print('Corr matrix')
print(corr_matrix(list(X.transpose([1, 0, 2]))))
print(' ')


if 'ho' in classifiers:
    ws = do_hyperopt(csvs_train)
    test_predicts = np.zeros(X[:,0,:].shape)
    for m in range(7):
        test_predicts += ws[m] * X[:,m,:]
    test_predicts /= 7
    print('roc %s logloss %s'%(roc_auc_score(Y,test_predicts),logloss(Y,test_predicts)))



for m in range(len(models)):
    print('%s roc %s logloss %s'%(models[m],roc_auc_score(Y,X[:,m,:]),logloss(Y,X[:,m,:])))
Exemplo n.º 3
0
import scipy
from utilities import corr_matrix

list_classes = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
list_logits = ['logits_' + c for c in list_classes]

csvs_train = ['models/CNN/inception5_slim/train_logits_folded.csv',
              'models/RNN/pavel_baseline/train_logits_folded.csv',
              'models/CAPS/caps_first_test/train_logits/caps_first_testk0_e3.csv']

dfs = [pd.read_csv(csv) for csv in csvs_train]
xs = [df[list_logits].values for df in dfs]
n_models = len(csvs_train)

print('Corr matrix')
print(corr_matrix(xs))
print(' ')


df = dfs[1].copy()
for logit in list_logits:
    df[logit] = df[logit].map(lambda x: 0 if x < 0.02 else 1)

print(roc_auc_score(y_true=df[list_classes].values,y_score=df[list_logits].values))

"""
graph = tf.Graph()
with graph.as_default():

    X = tf.Variable(df[list_logits].values,trainable=False, dtype=tf.float32)
    Y = tf.Variable(df[list_classes].values,trainable=False,dtype=tf.float32)