def main(argv): if len(argv) < 2: print("call: concepts_with_counterparts.py data_path [(bibtex_key|component)]") exit(1) cr = CorpusReaderWordlist(argv[1]) print("Data loaded", file=sys.stderr) dictdata_ids = [] if len(argv) == 3: wordlistdata_ids = cr.wordlistdata_ids_for_bibtex_key(argv[2]) if len(wordlistdata_ids) == 0: wordlistdata_ids = cr.wordlistdata_ids_for_component(argv[2]) if len(wordlistdata_ids) == 0: print("did not find any dictionary data for the bibtex_key or component {0}.".format(argv[2]), file=sys.stderr) sys.exit(1) else: wordlistdata_ids = cr.wordlistdata_string_ids bibtex_keys = collections.defaultdict(list) for wid in wordlistdata_ids: wordlistdata_string = cr.wordlistdata_string_ids[wid] bibtex_key = wordlistdata_string.split("_")[0] bibtex_keys[bibtex_key].append(wid) for bibtex_key in bibtex_keys: print("Writing data for wordlistdata bibtex key {0}".format(bibtex_key), file=sys.stderr) output = codecs.open("concepts_with_counterparts_%s.txt" % bibtex_key, "w", "utf-8") output.write("COUNTERPART\tCONCEPT\tLANGUAGE_BOOKNAME\tLANGUAGE_CODE\tBIBTEX_KEY\n") for wordlistdata_id in bibtex_keys[bibtex_key]: #heads_with_translations = cr.heads_with_translations_for_dictdata_id(dictdata_id) language_bookname = cr.get_language_bookname_for_wordlistdata_id(wordlistdata_id) language_code = cr.get_language_code_for_wordlistdata_id(wordlistdata_id) for concept, counterpart in cr.data(wordlistdata_id): output.write("{0}\t{1}\t{2}\t{3}\t{4}\n".format(counterpart, concept, language_bookname, language_code, bibtex_key)) output.close() if os.path.getsize("concepts_with_counterparts_%s.txt" % bibtex_key) == 0: os.remove("concepts_with_counterparts_%s.txt" % bibtex_key)