def load_phrase(file_path, tokenize=True):
    phrases = []
    with open(file_path, 'r') as f:
        # TODO here the ground-truth is already after processing, contains <digit>, not good for baseline methods...
        if tokenize:
            phrase_str = ';'.join([l.strip() for l in f.readlines()])
            phrases = dataset_utils.process_keyphrase(phrase_str)
            phrases = [l.strip().split(' ') for l in f.readlines()]
        return phrases
def evaluate_(text_dir, target_dir, prediction_dir, model_name, dataset_name, do_stem=True):
    stemmer = PorterStemmer()

    print('Evaluating on %s@%s' % (model_name, dataset_name))
    # Evaluation part
    micro_metrics = []
    micro_matches = []

    doc_names = [name[:name.index('.')] for name in os.listdir(text_dir)]

    number_groundtruth = 0
    number_present_groundtruth = 0

    for doc_name in doc_names:
        with open(text_dir+'/'+doc_name+'.txt', 'r') as f:
            text_tokens = (' '.join(f.readlines())).split( )

            text    = [t.split('_')[0] for t in text_tokens]
            postag  = [t.split('_')[1] for t in text_tokens]

        targets = load_phrase(target_dir+'/'+doc_name+'.txt', True)

        predictions = load_phrase(prediction_dir+'/'+doc_name+'.txt.phrases', False)

        # do processing to baseline predictions
        if (not model_name.startswith('CopyRNN')) and (not model_name.startswith('RNN')):
            predictions = dataset_utils.process_keyphrase(';'.join([' '.join(p) for p in predictions]))

        correctly_matched = np.asarray([0] * len(predictions), dtype='int32')

        print('*' * 100)

        # convert target index into string
        if do_stem:
            stemmed_input    = [stemmer.stem(t).strip().lower() for t in text]
            targets = [[stemmer.stem(w).strip().lower() for w in target] for target in targets]

        if 'target_filter' in config:
            present_targets = []

            for target in targets:
                keep = True
                # whether do filtering on groundtruth phrases. if config['target_filter']==None, do nothing
                match = None
                for i in range(len(stemmed_input) - len(target) + 1):
                    match = None
                    for j in range(len(target)):
                        if target[j] != stemmed_input[i + j]:
                            match = False
                    if j == len(target) - 1 and match == None:
                        match = True

                if match == True:
                    # if match and 'appear-only', keep this phrase
                    if config['target_filter'] == 'appear-only':
                        keep = keep and True
                    elif config['target_filter'] == 'non-appear-only':
                        keep = keep and False
                elif match == False:
                    # if not match and 'appear-only', discard this phrase
                    if config['target_filter'] == 'appear-only':
                        keep = keep and False
                    # if not match and 'non-appear-only', keep this phrase
                    elif config['target_filter'] == 'non-appear-only':
                        keep = keep and True

                if not keep:


            number_groundtruth += len(targets)
            number_present_groundtruth += len(present_targets)
            targets = present_targets

        printable = set(string.printable)
        # lines = [filter(lambda x: x in printable, l) for l in lines]
        predictions = [[filter(lambda x: x in printable, w) for w in prediction] for prediction in predictions]
        predictions = [[stemmer.stem(w).strip().lower() for w in prediction] for prediction in predictions]
        for pid, predict in enumerate(predictions):
            # check whether the predicted phrase is correct (match any groundtruth)
            for target in targets:
                if len(target) == len(predict):
                    flag = True
                    for i, w in enumerate(predict):
                        if predict[i] != target[i]:
                            flag = False
                    if flag:
                        correctly_matched[pid] = 1

        metric_dict = {}
        for number_to_predict in [5, 10, 15]:
            metric_dict['target_number'] = len(targets)
            metric_dict['prediction_number'] = len(predictions)
            metric_dict['correct_number@%d' % number_to_predict] = sum(correctly_matched[:number_to_predict])

            metric_dict['p@%d' % number_to_predict] = float(sum(correctly_matched[:number_to_predict])) / float(

            if len(targets) != 0:
                metric_dict['r@%d' % number_to_predict] = float(sum(correctly_matched[:number_to_predict])) / float(
                metric_dict['r@%d' % number_to_predict] = 0

            if metric_dict['p@%d' % number_to_predict] + metric_dict['r@%d' % number_to_predict] != 0:
                metric_dict['f1@%d' % number_to_predict] = 2 * metric_dict['p@%d' % number_to_predict] * metric_dict[
                    'r@%d' % number_to_predict] / float(
                    metric_dict['p@%d' % number_to_predict] + metric_dict['r@%d' % number_to_predict])
                metric_dict['f1@%d' % number_to_predict] = 0

            # Compute the binary preference measure (Bpref)
            bpref = 0.
            trunked_match = correctly_matched[:number_to_predict].tolist()  # get the first K prediction to evaluate
            match_indexes = np.nonzero(trunked_match)[0]

            if len(match_indexes) > 0:
                for mid, mindex in enumerate(match_indexes):
                    bpref += 1. - float(mindex - mid) / float(
                        number_to_predict)  # there're mindex elements, and mid elements are correct, before the (mindex+1)-th element
                metric_dict['bpref@%d' % number_to_predict] = float(bpref) / float(len(match_indexes))
                metric_dict['bpref@%d' % number_to_predict] = 0

            # Compute the mean reciprocal rank (MRR)
            rank_first = 0
                rank_first = trunked_match.index(1) + 1
            except ValueError:

            if rank_first > 0:
                metric_dict['mrr@%d' % number_to_predict] = float(1) / float(rank_first)
                metric_dict['mrr@%d' % number_to_predict] = 0


        Print information on each prediction
        # print stuff
        a = '[SOURCE][{0}]: {1}'.format(len(text) ,' '.join(text))
        a += '\n'

        b = '[TARGET]: %d targets\n\t\t' % (len(targets))
        for id, target in enumerate(targets):
            b += ' '.join(target) + '; '
        b += '\n'
        c = '[DECODE]: %d predictions' % (len(predictions))
        for id, predict in enumerate(predictions):
            c += ('\n\t\t[%d][%d]' % (len(predict), sum([len(w) for w in predict]))) + ' '.join(predict)
            if correctly_matched[id] == 1:
                c += ' [correct!]'
                # print(('\n\t\t[%.3f]'% score) + ' '.join(predict) + ' [correct!]')
                # print(('\n\t\t[%.3f]'% score) + ' '.join(predict))
        c += '\n'

        # c = '[DECODE]: {}'.format(' '.join(cut_zero(phrase, idx2word)))
        # if inputs_unk is not None:
        #     k = '[_INPUT]: {}\n'.format(' '.join(cut_zero(inputs_unk.tolist(),  idx2word, Lmax=len(idx2word))))
        #     logger.info(k)
        # a += k
        a += b + c

        for number_to_predict in [5, 10, 15]:
            d = '@%d - Precision=%.4f, Recall=%.4f, F1=%.4f, Bpref=%.4f, MRR=%.4f' % (
            number_to_predict, metric_dict['p@%d' % number_to_predict], metric_dict['r@%d' % number_to_predict],
            metric_dict['f1@%d' % number_to_predict], metric_dict['bpref@%d' % number_to_predict], metric_dict['mrr@%d' % number_to_predict])
            a += d + '\n'

        logger.info('*' * 100)

    logger.info('#(Ground-truth Keyphrase)=%d' % number_groundtruth)
    logger.info('#(Present Ground-truth Keyphrase)=%d' % number_present_groundtruth)

    Export the f@5 and f@10 for significance test
    for k in [5, 10]:
        with open(config['predict_path'] + '/micro-f@%d-' % (k) + model_name+'-'+dataset_name+'.txt', 'w') as writer:
            writer.write('\n'.join([str(m['f1@%d' % k]) for m in micro_metrics]))

    Compute the corpus evaluation
    csv_writer = open(config['predict_path'] + '/evaluate-' + model_name+'-'+dataset_name+'.txt', 'w')

    real_test_size = len(doc_names)
    overall_score = {}
    for k in [5, 10, 15]:
        correct_number = sum([m['correct_number@%d' % k] for m in micro_metrics])
        overall_target_number = sum([m['target_number'] for m in micro_metrics])
        overall_prediction_number = sum([m['prediction_number'] for m in micro_metrics])

        if real_test_size * k < overall_prediction_number:
            overall_prediction_number = real_test_size * k

        # Compute the Micro Measures, by averaging the micro-score of each prediction
        overall_score['p@%d' % k] = float(sum([m['p@%d' % k] for m in micro_metrics])) / float(real_test_size)
        overall_score['r@%d' % k] = float(sum([m['r@%d' % k] for m in micro_metrics])) / float(real_test_size)
        overall_score['f1@%d' % k] = float(sum([m['f1@%d' % k] for m in micro_metrics])) / float(real_test_size)

        # Print basic statistics
        logger.info('%s@%s' % (model_name, dataset_name))
        output_str = 'Overall - %s valid testing data=%d, Number of Target=%d/%d, Number of Prediction=%d, Number of Correct=%d' % (
                    config['predict_type'], real_test_size,
                    overall_target_number, overall_target_number,
                    overall_prediction_number, correct_number
        # Print micro-average performance
        output_str = 'Micro:\t\tP@%d=%f, R@%d=%f, F1@%d=%f' % (
                    k, overall_score['p@%d' % k],
                    k, overall_score['r@%d' % k],
                    k, overall_score['f1@%d' % k]
        csv_writer.write(', %f, %f, %f' % (
                    overall_score['p@%d' % k],
                    overall_score['r@%d' % k],
                    overall_score['f1@%d' % k]

        # Print macro-average performance
        overall_score['macro_p@%d' % k] = correct_number / float(overall_prediction_number)
        overall_score['macro_r@%d' % k] = correct_number / float(overall_target_number)
        if overall_score['macro_p@%d' % k] + overall_score['macro_r@%d' % k] > 0:
            overall_score['macro_f1@%d' % k] = 2 * overall_score['macro_p@%d' % k] * overall_score[
                'macro_r@%d' % k] / float(overall_score['macro_p@%d' % k] + overall_score['macro_r@%d' % k])
            overall_score['macro_f1@%d' % k] = 0

        output_str = 'Macro:\t\tP@%d=%f, R@%d=%f, F1@%d=%f' % (
                    k, overall_score['macro_p@%d' % k],
                    k, overall_score['macro_r@%d' % k],
                    k, overall_score['macro_f1@%d' % k]
        csv_writer.write(', %f, %f, %f' % (
                    overall_score['macro_p@%d' % k],
                    overall_score['macro_r@%d' % k],
                    overall_score['macro_f1@%d' % k]

        # Compute the binary preference measure (Bpref)
        overall_score['bpref@%d' % k] = float(sum([m['bpref@%d' % k] for m in micro_metrics])) / float(real_test_size)

        # Compute the mean reciprocal rank (MRR)
        overall_score['mrr@%d' % k] = float(sum([m['mrr@%d' % k] for m in micro_metrics])) / float(real_test_size)

        output_str = '\t\t\tBpref@%d=%f, MRR@%d=%f' % (
                    k, overall_score['bpref@%d' % k],
                    k, overall_score['mrr@%d' % k]
def check_data():
    config = setup_keyphrase_all()
    train_set, validation_set, test_set, idx2word, word2idx = deserialize_from_file(

    for dataset_name in config['testing_datasets']:
        print('*' * 50)

        number_groundtruth = 0
        number_present_groundtruth = 0

        loader = testing_data_loader(dataset_name,

        if dataset_name == 'nus':
            docs = loader.get_docs(only_abstract=True, return_dict=False)
            docs = loader.get_docs(return_dict=False)

        stemmer = PorterStemmer()

        for id, doc in enumerate(docs):

            text_tokens = dataset_utils.get_tokens(doc.title.strip() + ' ' +
            # if len(text_tokens) > 1500:
            #     text_tokens = text_tokens[:1500]
            print('[%d] length= %d' % (id, len(doc.text)))

            stemmed_input = [
                stemmer.stem(t).strip().lower() for t in text_tokens

            phrase_str = ';'.join([l.strip() for l in doc.phrases])
            phrases = dataset_utils.process_keyphrase(phrase_str)
            targets = [[stemmer.stem(w).strip().lower() for w in target]
                       for target in phrases]

            present_targets = []

            for target in targets:
                keep = True
                # whether do filtering on groundtruth phrases. if config['target_filter']==None, do nothing
                match = None
                for i in range(len(stemmed_input) - len(target) + 1):
                    match = None
                    for j in range(len(target)):
                        if target[j] != stemmed_input[i + j]:
                            match = False
                    if j == len(target) - 1 and match == None:
                        match = True

                if match == True:
                    # if match and 'appear-only', keep this phrase
                    if config['target_filter'] == 'appear-only':
                        keep = keep and True
                    elif config['target_filter'] == 'non-appear-only':
                        keep = keep and False
                elif match == False:
                    # if not match and 'appear-only', discard this phrase
                    if config['target_filter'] == 'appear-only':
                        keep = keep and False
                    # if not match and 'non-appear-only', keep this phrase
                    elif config['target_filter'] == 'non-appear-only':
                        keep = keep and True

                if not keep:


            number_groundtruth += len(targets)
            number_present_groundtruth += len(present_targets)

        print('number_groundtruth=' + str(number_groundtruth))
        print('number_present_groundtruth=' + str(number_present_groundtruth))
 def load_keyphrase_(path):
     with open(path, 'r') as f:
         keyphrase_str = ';'.join([l.strip() for l in f.readlines()])
         return dataset_utils.process_keyphrase(keyphrase_str)