def generate_result_file():
    dsets = []
    for lang in ['pt', 'es']:
        X = util.get_X_test(data_type='keras_tokenized_tri', lang=lang, file_type="dump")
        model, epoch = load_lastest(lang=lang)
        y_pred = util.one_hot_decode(model.predict(process_x(X)))
        index = np.load('./data/test_index_'+lang+'.npy')
        df = pd.DataFrame({'id': index, 'category': y_pred})
        df.index = df['id']
        dsets.append(df)
        print('y_pred '+lang+' unique: ', len(np.unique(y_pred)))
    df = pd.concat(dsets)
    df = df.sort_index()
    df[['id', 'category']].to_csv('./data/results-'+NAME+'.csv', index=False)
예제 #2
0
    #weights for each epoch according with the number of epochs trained
    weigths_epoch = {
        1: [1],
        2: [0.35, 0.65],
        3: [0.15, 0.35, 0.5],
        4: [0.1, 0.2, 0.3, 0.4],
        5: [0.1, 0.15, 0.2, 0.25, 0.3]
    }

    num_classes = len(util.get_categories())

    #Load test data for each language
    data = {}
    for lang in ['es', 'pt']:
        X_test = util.get_X_test(data_type='keras_tokenized_tri',
                                 lang=lang,
                                 file_type="dump")
        index = np.load(DATA_PATH + 'test_index_' + lang + '.npy')
        data[lang] = {'index': index, 'X_test': process_x(X_test)}
        del X_test, index
        gc.collect()

    paths = {}
    for model_name in model_list:
        PATH = DATA_PATH + 'models/' + model_name + '/'
        files = {'pt': {}, 'es': {}}
        if (len(os.listdir(PATH)) > 0):
            for file in os.listdir(PATH):
                if (file.startswith('weights')):
                    epoch = int(file.split('-')[1])
                    lang = file.split('-')[-1].split('.')[0]