Пример #1
0
def test_count_matches(brexit):
    corpus = Corpus(brexit['corpus_name'])
    corpus.query(cqp_query='[lemma="nigel"]',
                 context=10,
                 context_break='tweet',
                 name='Test',
                 save=True)
    cqp = corpus.start_cqp()
    counts = corpus.counts.matches(cqp, 'Test')
    assert ("Nigel" in counts.index)
Пример #2
0
def test_concordance_options(germaparl):
    corpus = Corpus(germaparl['corpus_name'],
                    registry_path=germaparl['registry_path'])
    dump = corpus.query('"SPD"')
    print(dump.concordance(form='raw'))
    print(dump.concordance(form='simple'))
    print(dump.concordance(form='kwic'))
    print(dump.concordance(form='dataframes'))
    print(dump.concordance(form='extended'))
Пример #3
0
def test_argmin_query(brexit):
    corpus = Corpus(brexit['corpus_name'], lib_path=brexit['lib_path'])

    query = brexit['query_argmin']

    dump = corpus.query(cqp_query=query['cqp'],
                        context=query.get('context', None),
                        context_break=query.get('s_context', None),
                        corrections=query['corrections'],
                        match_strategy=query['match_strategy'])

    conc = dump.concordance(p_show=query['p_show'],
                            s_show=query['s_show'],
                            p_text=query['p_text'],
                            p_slots=query['p_slots'],
                            slots=query['slots'],
                            order='first',
                            cut_off=None,
                            form='extended')

    print(conc)
    print(conc['df'].iloc[0])
Пример #4
0
def test_context_matches(germaparl):
    corpus = Corpus(germaparl['corpus_name'],
                    registry_path=germaparl['registry_path'])
    dump = corpus.query('"SPD"')
    print(dump.matches())
    print(dump.context())
Пример #5
0
def test_keywords_options(germaparl):
    corpus = Corpus(germaparl['corpus_name'],
                    registry_path=germaparl['registry_path'])
    dump = corpus.query('"SPD" expand to s')
    print(dump.keywords(order='log_ratio', cut_off=200))
Пример #6
0
def test_query2dump(germaparl):
    corpus = Corpus(germaparl['corpus_name'],
                    registry_path=germaparl['registry_path'])
    dump = corpus.query('"SPD"')
    print(dump)
Пример #7
0
def test_keywords(germaparl):
    corpus = Corpus(germaparl['corpus_name'],
                    registry_path=germaparl['registry_path'])
    dump = corpus.query('"SPD" expand to s')
    print(dump.keywords())
Пример #8
0
def test_collocates_options(germaparl):
    corpus = Corpus(germaparl['corpus_name'],
                    registry_path=germaparl['registry_path'])
    dump = corpus.query('"SPD"')
    print(dump.collocates(order='log_likelihood', cut_off=200))
Пример #9
0
def test_collocates(germaparl):
    corpus = Corpus(germaparl['corpus_name'],
                    registry_path=germaparl['registry_path'])
    dump = corpus.query('"SPD"')
    print(dump.collocates())
Пример #10
0
def test_breakdown(germaparl):
    corpus = Corpus(germaparl['corpus_name'],
                    registry_path=germaparl['registry_path'])
    dump = corpus.query('"SPD"')
    print(dump.breakdown())