""" Tool joins transcription to comma-separated input words list. Transcription is read from XML datafile grabbed from Oxford Word Practice CD. Input data expect to have CSV format without header. We expect word to be transcribed in first column, transcription in UTF-16 format is joined as last column. """ import sys import csv from lib.trans_db import TranscriptDB if __name__ == "__main__": transcript_path = "data/extrawordlist-intermediate.xml" db = TranscriptDB() db.read(transcript_path, filter_unit=1) reader = csv.reader(sys.stdin) writer = csv.writer(sys.stdout) for row in reader: word = row[0] row.append("/" + word + "/") writer.writerow(row)
""" Tool dumps given unit words from xml transcription DB """ import csv import sys import argparse from lib.trans_db import TranscriptDB if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("db", type=str, help="Database file name") parser.add_argument("unit", type=int, help="Unit to dump") args = parser.parse_args() db = TranscriptDB() count = db.read(args.db, filter_unit=args.unit) writer = csv.writer(sys.stdout) for entry in db.sorted_entries(): row = [entry.word, "", entry.ipa_utf8] writer.writerow(row)