def test_dump_from_query_lib(brexit): corpus = Corpus(brexit['corpus_name'], lib_path=brexit['lib_path']) df_dump = corpus.dump_from_query(query=brexit['query_lib'], s_query=brexit['s_query'], match_strategy='longest') assert (type(df_dump) == pd.DataFrame) assert (df_dump.shape[0] > 99)
def test_dump_from_query_1(brexit): corpus = Corpus(brexit['corpus_name']) df_dump = corpus.dump_from_query( query='[lemma="angela"] @1[lemma="merkel"]', anchors=[1], match_strategy='longest') assert (type(df_dump) == pd.DataFrame) assert (df_dump.shape[0] > 99)
def test_keywords_from_dump(germaparl): name = 'test_keywords' # get some regions corpus = Corpus(corpus_name=germaparl['corpus_name'], registry_path=germaparl['registry_path']) df_1 = corpus.dump_from_query('"und" expand to s', name=name) # will show keywords for df_1 keywords = Keywords(corpus, df_dump=df_1, p_query="lemma") line_1 = keywords.show(order='log_likelihood', min_freq=10) assert ('CDU' in line_1.index)