def insert_words(c, words): for entry in words: entry_id = database.get_entry_id( c, entry.traditional, entry.simplified, entry.pinyin, entry.jyutping, entry.freq, ) if entry_id == -1: entry_id = database.insert_entry( c, entry.traditional, entry.simplified, entry.pinyin, entry.jyutping, entry.freq, ) if entry_id == -1: logging.error( f"Could not insert word {entry.traditional}, uh oh!") continue for definition in entry.definitions: definition_id = database.insert_definition(c, definition.definition, definition.label, entry_id, 1, None) if definition_id == -1: logging.error( f"Could not insert definition {definition} for word {entry.traditional} " "- check if the definition is a duplicate!") continue
def insert_words(c, words): # Reserved sentence IDs: # - 0-999999999: Tatoeba # - 1000000000-1999999999: words.hk # - 2000000000-2999999999: CantoDict # - 3000000000-3999999999: MoEDict # - 4000000000-4999999999: Cross-Straits Dictionary example_id = 4000000000 for entry in words: entry_id = database.get_entry_id( c, entry.traditional, entry.simplified, entry.pinyin, entry.jyutping, entry.freq, ) if entry_id == -1: entry_id = database.insert_entry( c, entry.traditional, entry.simplified, entry.pinyin, entry.jyutping, entry.freq, ) if entry_id == -1: logging.error( f"Could not insert word {entry.traditional}, uh oh!") continue for definition in entry.definitions: definition_id = database.insert_definition(c, definition.definition, definition.label, entry_id, 1, None) if definition_id == -1: logging.error( f"Could not insert definition {definition} for word {entry.traditional} " "- check if the definition is a duplicate!") continue for example in definition.examples: examples_inserted = insert_example(c, definition_id, example_id, example) example_id += examples_inserted
def insert_words(c, words): # Because the sentence id is presumed to be unique by Tatoeba, we will give it # a namespace of 999999999 potential sentences. Thus, words.hk sentences will start # at rowid 1000000000. example_id = 1000000000 for key in words: for entry in words[key]: trad = entry.traditional simp = entry.simplified jyut = entry.jyutping pin = entry.pinyin freq = entry.freq entry_id = database.get_entry_id(c, trad, simp, pin, jyut, freq) if entry_id == -1: entry_id = database.insert_entry(c, trad, simp, pin, jyut, freq, None) if entry_id == -1: logging.warning(f"Could not insert word {trad}, uh oh!") continue # Insert each meaning for the entry for definition in entry.definitions: definition_id = database.insert_definition( c, definition.definition, definition.label, entry_id, 1, None) if definition_id == -1: # Try to find definition if we got an error definition_id = database.get_definition_id( c, definition.definition, definition.label, entry_id, 1) if definition_id == -1: logging.warning( f"Could not insert definition {definition} for word {trad}, uh oh!" ) continue # Insert examples for each meaning for example in definition.examples: examples_inserted = insert_example(c, definition_id, example_id, example) example_id += examples_inserted
def write(db_name, source, entries): db = sqlite3.connect(db_name) c = db.cursor() # Set version of database database.write_database_version(c) # Delete old tables and indices, then create new one database.drop_tables(c) database.create_tables(c) # Add sources to table database.insert_source( c, source.name, source.shortname, source.version, source.description, source.legal, source.link, source.update_url, source.other, None, ) for key in entries: for entry in entries[key]: entry_id = database.get_entry_id( c, entry.traditional, entry.simplified, entry.pinyin, entry.jyutping, entry.freq, ) if entry_id == -1: entry_id = database.insert_entry( c, entry.traditional, entry.simplified, entry.pinyin, entry.jyutping, entry.freq, ) if entry_id == -1: logging.error( f"Could not insert word {entry.traditional}, uh oh!") continue for label, definition in entry.definitions: definition_id = database.insert_definition( c, definition, label, entry_id, 1, None) if definition_id == -1: logging.error( f"Could not insert definition {definition} for word {entry.traditional} " "- check if the definition is a duplicate!") continue database.generate_indices(c) db.commit() db.close()
def write(db_name, source, entries, sentences, translations): db = sqlite3.connect(db_name) c = db.cursor() # Set version of database database.write_database_version(c) # Delete old tables and indices, then create new one database.drop_tables(c) database.create_tables(c) # Add sources to table database.insert_source( c, source.name, source.shortname, source.version, source.description, source.legal, source.link, source.update_url, source.other, None, ) for entry in entries: entry_id = database.get_entry_id( c, entry.traditional, entry.simplified, entry.pinyin, entry.jyutping, entry.freq, ) if entry_id == -1: entry_id = database.insert_entry( c, entry.traditional, entry.simplified, entry.pinyin, entry.jyutping, entry.freq, ) if entry_id == -1: logging.error(f"Could not insert word {entry.traditional}, uh oh!") continue for label, definition in entry.definitions: definition_id = database.insert_definition( c, definition, label, entry_id, 1, None ) if definition_id == -1: logging.error( f"Could not insert definition {definition} for word {entry.traditional}, uh oh!" ) continue for sentence in sentences: database.insert_chinese_sentence( c, sentence.traditional, sentence.simplified, sentence.pinyin, sentence.jyutping, sentence.language, sentence.id, ) # In CantoDict, a sentence ID and its corresponding translation ID are separated by 500000000 # (See parse_sentence_file()) database.insert_sentence_link( c, sentence.id, sentence.id + 500000000, 1, True, ) for translation in translations: database.insert_nonchinese_sentence( c, translation.sentence, translation.language, translation.id, ) database.generate_indices(c) db.commit() db.close()
def write(entries, db_name): db = sqlite3.connect(db_name) c = db.cursor() database.write_database_version(c) database.drop_tables(c) database.create_tables(c) database.insert_source( c, source.name, source.shortname, source.version, source.description, source.legal, source.link, source.update_url, source.other, None, ) entry_id = 0 for key in entries: for entry in entries[key]: entry_id = database.get_entry_id( c, entry.traditional, entry.simplified, entry.pinyin, entry.jyutping if entry.jyutping != "" else entry.fuzzy_jyutping, entry.freq, ) if entry_id == -1: entry_id = database.insert_entry( c, entry.traditional, entry.simplified, entry.pinyin, entry.jyutping if entry.jyutping != "" else entry.fuzzy_jyutping, entry.freq, ) if entry_id == -1: logging.error( f"Could not insert word {entry.traditional}, uh oh!") continue for definition in entry.definitions: definition_id = database.insert_definition( c, definition, "", entry_id, 1, None) if definition_id == -1: logging.error( f"Could not insert definition {definition} for word {entry.traditional}, uh oh!" ) continue database.generate_indices(c) db.commit() db.close()