Python CorpusReaderWordlist.get_language_code_for_wordlistdata_id示例

编程语言: Python

命名空间/包名称: qlc.corpusreader

方法/功能: get_language_code_for_wordlistdata_id

hotexamples.com的示例: 2

Python CorpusReaderWordlist.get_language_code_for_wordlistdata_id - 已找到2个示例。这些是从开源项目中提取的最受好评的qlc.corpusreader.CorpusReaderWordlist.get_language_code_for_wordlistdata_id现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

wordlistdata_ids_for_bibtex_key(5)

get_language_bookname_for_wordlistdata_id(5)

concepts_with_counterparts_for_wordlistdata_id(4)

get_language_code_for_wordlistdata_id(2)

counterparts_for_wordlistdata_id(1)

data(1)

get_language_bookname_for_wordlist_data_id(1)

get_language_code_for_wordlist_data_id(1)

wordlist_ids_for_bibtex_key(1)

wordlistdata_ids_for_component(1)

示例#1

显示文件

文件： ngram.py 项目： pombredanne/qlc

def main(argv):

    if len(argv) < 2:
        print("call: counterparts_huber1992.py data_path")
        exit(1)

    cr = CorpusReaderWordlist(os.path.join(argv[1], "csv"))
    o = OrthographyParser(os.path.join(argv[1], "orthography_profiles", "huber1992.txt"))
    
    ngrams_by_language_count = list()
    ngrams_set = set()
    
    for i, wordlistdata_id in enumerate(cr.wordlistdata_ids_for_bibtex_key('huber1992')):
        #counterparts = cr.counterpartsForWordlistdataId(wordlistdata_id)
        #print wordlistdata_id
        language_bookname = cr.get_language_bookname_for_wordlistdata_id(wordlistdata_id)
        language_code = cr.get_language_code_for_wordlistdata_id(wordlistdata_id)

        counterpart_graphemes = (o.parse_string_to_graphemes(counterpart) \
               for counterpart in cr.counterparts_for_wordlistdata_id(wordlistdata_id))

        matrix = qlc.ngram.words_ngrams_matrix_for_graphemes_list(counterpart_graphemes, 2)
        
        sum = numpy.sum(matrix.matrix, 0)
        #print("Sum length: {0}".format(len(sum)))
        #print("Column length: {0}".format(len(columns)))
        
        if len(sum.nonzero()[0]) != matrix.number_of_columns:
            print("Error: ")
            print("{0} != {1}".format(len(sum.nonzero()[0]), len(columns)))
            print(language_bookname)
        
        ngrams_by_language_count.append(collections.defaultdict(int))
        for j, c in enumerate(matrix.column_names):
            ngrams_set.add(c)
            ngrams_by_language_count[i][c] = sum[j]

    ngrams_list = sorted(list(ngrams_set))
    matrix = qlc.matrix.Matrix(ngrams_by_language_count, ngrams_list)
    # matrix = numpy.zeros( ( len(ngrams_by_language_count), len(ngrams_list) ) )
    
    for i in range(matrix.number_of_rows):
        for j, ngram in enumerate(ngrams_list):
            matrix.matrix[i][j] = ngrams_by_language_count[i][ngram]
            
    print(matrix.matrix)

示例#2

显示文件

文件： concepts_with_counterparts.py 项目： pombredanne/qlc

def main(argv):

    if len(argv) < 2:
        print("call: concepts_with_counterparts.py data_path [(bibtex_key|component)]")
        exit(1)

    cr = CorpusReaderWordlist(argv[1])
    print("Data loaded", file=sys.stderr)
    
    dictdata_ids = []    
    if len(argv) == 3:
        wordlistdata_ids = cr.wordlistdata_ids_for_bibtex_key(argv[2])
        if len(wordlistdata_ids) == 0:
            wordlistdata_ids = cr.wordlistdata_ids_for_component(argv[2])
            if len(wordlistdata_ids) == 0:
                print("did not find any dictionary data for the bibtex_key or component {0}.".format(argv[2]), file=sys.stderr)
                sys.exit(1)
    else:
        wordlistdata_ids = cr.wordlistdata_string_ids
        
    bibtex_keys = collections.defaultdict(list)
    for wid in wordlistdata_ids:
        wordlistdata_string = cr.wordlistdata_string_ids[wid]
        bibtex_key = wordlistdata_string.split("_")[0]
        bibtex_keys[bibtex_key].append(wid)
        
    for bibtex_key in bibtex_keys:
    
        print("Writing data for wordlistdata bibtex key {0}".format(bibtex_key), file=sys.stderr)

        output = codecs.open("concepts_with_counterparts_%s.txt" % bibtex_key, "w", "utf-8")
        output.write("COUNTERPART\tCONCEPT\tLANGUAGE_BOOKNAME\tLANGUAGE_CODE\tBIBTEX_KEY\n")

        for wordlistdata_id in bibtex_keys[bibtex_key]:
            #heads_with_translations = cr.heads_with_translations_for_dictdata_id(dictdata_id)
            language_bookname = cr.get_language_bookname_for_wordlistdata_id(wordlistdata_id)
            language_code = cr.get_language_code_for_wordlistdata_id(wordlistdata_id)
            
            for concept, counterpart in cr.data(wordlistdata_id):
                output.write("{0}\t{1}\t{2}\t{3}\t{4}\n".format(counterpart, concept, language_bookname, language_code, bibtex_key))
            
        output.close()

        if os.path.getsize("concepts_with_counterparts_%s.txt" % bibtex_key) == 0:
            os.remove("concepts_with_counterparts_%s.txt" % bibtex_key)