# -*- coding: utf-8 -*- """ Kitconc examples @author: [email protected] """ from kitconc.kit_corpus import Corpus corpus = Corpus('kitconc-examples/workspace', 'ads', 'english') clusters = corpus.clusters('experience', size=3, show_progress=True) print(clusters.df.head(10)) clusters.save_excel(corpus.output_path + 'clusters.xlsx')
# -*- coding: utf-8 -*- """ Kitconc examples @author: [email protected] """ from kitconc.kit_corpus import Corpus corpus = Corpus('kitconc-examples/workspace', 'ads', 'english') wordlist = corpus.wordlist(show_progress=True) keywords = corpus.keywords(wordlist, show_progress=True) keywords_dispersion = corpus.keywords_dispersion(keywords, show_progress=True) print(keywords_dispersion.df.head(10)) keywords_dispersion.save_excel(corpus.output_path + 'keywords_dispersion.xlsx')
# -*- coding: utf-8 -*- """ Kitconc examples @author: [email protected] """ from kitconc.kit_corpus import Corpus corpus = Corpus('kitconc-examples/workspace', 'ads', 'english') collocates = corpus.collocates('experience', left_span=2, right_span=2, coll_pos='IN NN JJ VBN VBD', show_progress=True) print(collocates.df.head(10)) collocates.save_excel(corpus.output_path + 'collocates.xlsx')
# -*- coding: utf-8 -*- """ Kitconc examples @author: [email protected] """ from kitconc.kit_corpus import Corpus corpus = Corpus('kitconc-examples/workspace', 'ads', 'english') concordances = corpus.concordance('experience', show_progress=True) print(concordances.df.head(10)) concordances.save_excel(corpus.output_path + 'concordances.xlsx', highlight='R1 R2 R3')
# -*- coding: utf-8 -*- # Author: [email protected] import sys from kitconc.kit_corpus import Corpus print('Hello!') print('') print("""This is an example on how to create a custom scripts. \nHere is how arguments are received: """) print('Arg 1 - Workspace: %s' % sys.argv[1]) print('Arg 2 - Corpus: %s' % sys.argv[2]) if len(sys.argv) > 3: for i in range(3, len(sys.argv)): print('Arg %s: %s' % (i, sys.argv[i])) print('\nHere is how to use the Corpus object:') corpus = Corpus(sys.argv[1], sys.argv[2]) print('\nTokens: %s' % corpus.tokens())
# -*- coding: utf-8 -*- """ Kitconc examples @author: [email protected] """ from kitconc.kit_corpus import Corpus corpus = Corpus('kitconc-examples/workspace', 'ads', 'english') kwic = corpus.kwic('experience', show_progress=True) kwic.sort('R1', 'R2', 'R3') print(kwic.df.head(10)) kwic.save_excel(corpus.output_path + 'kwic.xlsx', highlight='R1 R2 R3')
# -*- coding: utf-8 -*- """ Kitconc examples @author: [email protected] """ from kitconc.kit_corpus import Corpus corpus = Corpus('kitconc-examples/workspace', 'ads', 'english') dispersion = corpus.dispersion('salary') print(dispersion.df.head(10)) dispersion.save_excel(corpus.output_path + 'dispersion.xlsx')
# -*- coding: utf-8 -*- """ Kitconc examples @author: [email protected] """ from kitconc.kit_corpus import Corpus corpus = Corpus('kitconc-examples/workspace', 'ads', 'english') collocates = corpus.collocates('skills', left_span=3, right_span=3, coll_pos='NN JJ', show_progress=True) print(collocates.df.head(10)) collocates.save_excel(corpus.output_path + 'collocates.xlsx') # plot collocates collocates.plot_collgraph(node='skills')
# -*- coding: utf-8 -*- """ Kitconc examples @author: [email protected] """ from kitconc.kit_corpus import Corpus corpus = Corpus('kitconc-examples/workspace', 'ads', 'english') ngrams = corpus.ngrams(size=3, pos='NN IN NN', show_progress=True) print(ngrams.df.head(10)) ngrams.save_excel(corpus.output_path + 'ngrams.xlsx')
# -*- coding: utf-8 -*- """ Kitconc examples @author: [email protected] """ from kitconc.kit_corpus import Corpus corpus = Corpus('kitconc-examples/workspace', 'ads', 'english') kwic = corpus.kwic('skills', show_progress=True) collocations = corpus.collocations(kwic, show_progress=True) print(collocations.df.head(10)) collocations.save_excel(corpus.output_path + 'collocations.xlsx') # plot a collocate distribution collocations.plot_colldist('strong')
# -*- coding: utf-8 -*- """ Kitconc examples @author: [email protected] """ from kitconc.kit_corpus import Corpus # reference to the corpus corpus = Corpus('kitconc-examples/workspace','ads','english') # make wordlist wordlist = corpus.wordlist(show_progress=True) # print the top 10 print(wordlist.df.head(10)) # save Excel file wordlist.save_excel(corpus.output_path + 'wordlist.xlsx')
# -*- coding: utf-8 -*- """ Kitconc examples @author: [email protected] """ from kitconc.kit_corpus import Corpus # reference to the corpus corpus = Corpus('kitconc-examples/workspace', 'ads', 'english') # add texts from source folder corpus.add_texts('kitconc-examples/ads', show_progress=True)