def main():
    logger.info('------------Analysis SQuAD dataset--------------')
    logger.info('loading config file...')
    global_config = read_config()

    logger.info('reading squad dataset...')
    dataset = SquadDataset(global_config)

    train_context_len_cnt, train_context_len = dataset.gather_context_seq_len('train')
    dev_context_len_cnt, dev_context_len = dataset.gather_context_seq_len('dev')

    train_answer_len = dataset.gather_answer_seq_len('train', max_len=9)
    dev_answer_len = dataset.gather_answer_seq_len('dev', max_len=9)

    logging.info('train context length cnt: ' + str(train_context_len_cnt))
    logging.info('dev context length cnt: ' + str(dev_context_len_cnt))

    sns.set()
    train_context_len.plot.scatter('length', 'cnt', title='train')
    plt.xlabel('passage length')
    dev_context_len.plot.scatter('length', 'cnt', title='dev')
    plt.xlabel('passage length')

    train_answer_len.plot.line('length', 'cnt', marker='o', title='train')
    plt.xticks(range(len(train_answer_len['length'])), train_answer_len['length'])
    plt.xlabel('answer length')

    dev_answer_len.plot.line('length', 'cnt', marker='o', title='dev')
    plt.xticks(range(len(dev_answer_len['length'])), dev_answer_len['length'])
    plt.xlabel('answer length')

    plt.show()
Пример #2
0
def main():
    logger.info('------------Analysis SQuAD dataset--------------')
    logger.info('loading config file...')
    global_config = read_config()

    logger.info('reading squad dataset...')
    dataset = SquadDataset(global_config)

    train_context_len_cnt, train_context_len = dataset.gather_context_seq_len('train')
    dev_context_len_cnt, dev_context_len = dataset.gather_context_seq_len('dev')

    logging.info('train context length cnt: ' + str(train_context_len_cnt))
    logging.info('dev context length cnt: ' + str(dev_context_len_cnt))

    train_context_len.plot.scatter('length', 'cnt', title='train')
    dev_context_len.plot.scatter('length', 'cnt', title='dev')

    plt.show()