def write(dictionary, args, output_file_path): # result to be returned result = None # get absolute path output_file_path_absolute = os.path.abspath(output_file_path) # get absolute path for collation function file bookeen_collation_function_path = None if args.bookeen_collation_function is not None: bookeen_collation_function_path = os.path.abspath(args.bookeen_collation_function) # create tmp directory cwd = os.getcwd() tmp_path = create_temp_directory() print_debug("Working in temp dir '%s'" % (tmp_path), args.debug) os.chdir(tmp_path) # get the basename base = os.path.basename(output_file_path) if base.endswith(".zip"): base = base[:-4] # copy empty.idx into tmp_path idx_file_path = base + u".dict.idx" dict_file_path = base + u".dict" copy_file(EMPTY_FILE_PATH, idx_file_path) # open index sql_connection = sqlite3.connect(idx_file_path) # install collation in the index collation_function = collate_function_default if bookeen_collation_function_path is not None: try: collation_function = imp.load_source("", bookeen_collation_function_path).collate_function print_debug("Using collation function from '%s'" % (bookeen_collation_function_path), args.debug) except: print_error("Unable to load collation function from '%s'. Using the default collation function instead." % (bookeen_collation_function_path)) sql_connection.create_collation("IcuNoCase", collation_function) sql_connection.text_factory = str # get a cursor and delete any data from the index file sql_cursor = sql_connection.cursor() sql_cursor.execute("delete from T_DictIndex") # write c_* files # each c_* file has MAX_CHUNK_SIZE < size <= (MAX_CHUNK_SIZE * 2) bytes (tentatively) print_debug("Writing c_* files...", args.debug) files_to_compress = [] current_offset = 0 chunk_index = 1 chunk_file_path = "%s%d" % (CHUNK_FILE_PREFIX, chunk_index) files_to_compress.append(chunk_file_path) chunk_file_obj = io.open(chunk_file_path, "wb") for entry_index in dictionary.entries_index_sorted: entry = dictionary.entries[entry_index] definition_bytes = entry.definition.encode("utf-8") definition_size = len(definition_bytes) chunk_file_obj.write(definition_bytes) # insert headword into index file sql_tuple = (0, entry.headword, current_offset, definition_size, chunk_index) sql_cursor.execute("insert into T_DictIndex values (?,?,?,?,?)", sql_tuple) # insert synonyms into index file if not args.ignore_synonyms: for synonym in entry.get_synonyms(): sql_tuple = (0, synonym[0], current_offset, definition_size, chunk_index) sql_cursor.execute("insert into T_DictIndex values (?,?,?,?,?)", sql_tuple) # update offset current_offset += definition_size # if we reached CHUNK_SIZE, open the next c_* file if current_offset > CHUNK_SIZE: chunk_file_obj.close() chunk_index += 1 chunk_file_path = "%s%d" % (CHUNK_FILE_PREFIX, chunk_index) files_to_compress.append(chunk_file_path) chunk_file_obj = io.open(chunk_file_path, "wb") current_offset = 0 chunk_file_obj.close() print_debug("Writing c_* files... done", args.debug) # compress print_debug("Compressing c_* files...", args.debug) file_zip_obj = zipfile.ZipFile(dict_file_path, "w", zipfile.ZIP_DEFLATED) for file_to_compress in files_to_compress: file_to_compress = os.path.basename(file_to_compress) file_zip_obj.write(file_to_compress) file_zip_obj.close() print_debug("Compressing c_* files... done", args.debug) # update index metadata print_debug("Updating index metadata...", args.debug) header = HEADER % (args.language_from) sql_cursor.execute("update T_DictInfo set F_xhtmlHeader=?", (header,)) sql_cursor.execute("update T_DictInfo set F_LangFrom=?", (args.language_from,)) sql_cursor.execute("update T_DictInfo set F_LangTo=?", (args.language_to,)) sql_cursor.execute("update T_DictInfo set F_Licence=?", (args.license,)) sql_cursor.execute("update T_DictInfo set F_Copyright=?", (args.copyright,)) sql_cursor.execute("update T_DictInfo set F_Title=?", (args.title,)) sql_cursor.execute("update T_DictInfo set F_Description=?", (args.description,)) sql_cursor.execute("update T_DictInfo set F_Year=?", (args.year,)) # the meaning of the following is unknown sql_cursor.execute("update T_DictInfo set F_Alphabet=?", ("Z",)) sql_cursor.execute("update T_DictInfo set F_CollationLevel=?", ("1",)) sql_cursor.execute("update T_DictVersion set F_DictType=?", ("stardict",)) sql_cursor.execute("update T_DictVersion set F_Version=?", ("11",)) print_debug("Updating index metadata... done", args.debug) # compact and close sql_cursor.execute("vacuum") sql_cursor.close() sql_connection.close() # create .install file or copy .dict.idx and .dict into requested output directory parent_output_directory = os.path.split(output_file_path_absolute)[0] if args.bookeen_install_file: print_debug("Creating .install file...", args.debug) file_zip_path = os.path.join(parent_output_directory, base + u".install") file_zip_obj = zipfile.ZipFile(file_zip_path, "w", zipfile.ZIP_DEFLATED) for file_to_compress in [dict_file_path, idx_file_path]: file_to_compress = os.path.basename(file_to_compress) file_zip_obj.write(file_to_compress) file_zip_obj.close() result = [file_zip_path] print_debug("Creating .install file... done", args.debug) else: print_debug("Copying .dict.idx and .dict files...", args.debug) dict_file_path_final = os.path.join(parent_output_directory, os.path.basename(dict_file_path)) idx_file_path_final = os.path.join(parent_output_directory, os.path.basename(idx_file_path)) copy_file(dict_file_path, dict_file_path_final) copy_file(idx_file_path, idx_file_path_final) result = [idx_file_path_final, dict_file_path_final] print_debug("Copying .dict.idx and .dict files... done", args.debug) # delete tmp directory os.chdir(cwd) if args.keep: print_info("Not deleting temp dir '%s'" % (tmp_path)) else: delete_directory(tmp_path) print_debug("Deleted temp dir '%s'" % (tmp_path), args.debug) return result
def write(dictionary, args, output_file_path): # result to be returned result = None # get absolute path output_file_path_absolute = os.path.abspath(output_file_path) # sort by headword, optionally ignoring case dictionary.sort(by_headword=True, ignore_case=args.sort_ignore_case) # create groups special_group, group_keys, group_dict = dictionary.group( prefix_function_path=args.group_by_prefix_function, prefix_length=int(args.group_by_prefix_length), merge_min_size=int(args.group_by_prefix_merge_min_size), merge_across_first=args.group_by_prefix_merge_across_first ) all_group_keys = group_keys if special_group is not None: all_group_keys += [u"SPECIAL"] # create mobi object mobi = DictionaryEbook(ebook_format=DictionaryEbook.MOBI, args=args) # add groups for key in all_group_keys: if key == u"SPECIAL": group_entries = special_group else: group_entries = group_dict[key] mobi.add_group(key, group_entries) # create output file print_debug("Writing to file '%s'..." % (output_file_path_absolute), args.debug) mobi.write(output_file_path_absolute, compress=False) result = [output_file_path] print_debug("Writing to file '%s'... done" % (output_file_path_absolute), args.debug) # run kindlegen tmp_path = mobi.get_tmp_path() if args.mobi_no_kindlegen: print_info("Not running kindlegen, the raw files are located in '%s'" % tmp_path) result = [tmp_path] else: try: print_debug("Creating .mobi file with kindlegen...", args.debug) kindlegen_path = KINDLEGEN opf_file_path_absolute = os.path.join(tmp_path, "OEBPS", "content.opf") mobi_file_path_relative = u"content.mobi" mobi_file_path_absolute = os.path.join(tmp_path, "OEBPS", mobi_file_path_relative) if args.kindlegen_path is None: print_info(" Running '%s' from $PATH" % KINDLEGEN) else: kindlegen_path = args.kindlegen_path print_info(" Running '%s' from '%s'" % (KINDLEGEN, kindlegen_path)) proc = subprocess.Popen( [kindlegen_path, opf_file_path_absolute, "-o", mobi_file_path_relative], stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE ) output = proc.communicate() if args.debug: output_unicode = (output[0]).decode("utf-8") print_debug(output_unicode, args.debug) copy_file(mobi_file_path_absolute, output_file_path_absolute) result = [output_file_path] print_debug("Creating .mobi file with kindlegen... done", args.debug) except OSError as exc: print_error(" Unable to run '%s' as '%s'" % (KINDLEGEN, kindlegen_path)) print_error(" Please make sure '%s':" % KINDLEGEN) print_error(" 1. is available on your $PATH or") print_error(" 2. specify its path with --kindlegen-path") # delete tmp directory tmp_path = mobi.get_tmp_path() if args.keep: print_info("Not deleting temp dir '%s'" % (tmp_path)) else: mobi.delete() print_debug("Deleted temp dir '%s'" % (tmp_path), args.debug) return result
def write(dictionary, args, output_file_path): # result to be returned result = None # get absolute path output_file_path_absolute = os.path.abspath(output_file_path) # sort by headword, optionally ignoring case dictionary.sort(by_headword=True, ignore_case=args.sort_ignore_case) # create groups special_group, group_keys, group_dict = dictionary.group( prefix_function_path=args.group_by_prefix_function, prefix_length=int(args.group_by_prefix_length), merge_min_size=int(args.group_by_prefix_merge_min_size), merge_across_first=args.group_by_prefix_merge_across_first) all_group_keys = group_keys if special_group is not None: all_group_keys += [u"SPECIAL"] # create mobi object mobi = DictionaryEbook(ebook_format=DictionaryEbook.MOBI, args=args) # add groups for key in all_group_keys: if key == u"SPECIAL": group_entries = special_group else: group_entries = group_dict[key] mobi.add_group(key, group_entries) # create output file print_debug("Writing to file '%s'..." % (output_file_path_absolute), args.debug) mobi.write(output_file_path_absolute, compress=False) result = [output_file_path] print_debug("Writing to file '%s'... done" % (output_file_path_absolute), args.debug) # run kindlegen tmp_path = mobi.get_tmp_path() if args.mobi_no_kindlegen: print_info("Not running kindlegen, the raw files are located in '%s'" % tmp_path) result = [tmp_path] else: try: print_debug("Creating .mobi file with kindlegen...", args.debug) kindlegen_path = KINDLEGEN opf_file_path_absolute = os.path.join(tmp_path, "OEBPS", "content.opf") mobi_file_path_relative = u"content.mobi" mobi_file_path_absolute = os.path.join(tmp_path, "OEBPS", mobi_file_path_relative) if args.kindlegen_path is None: print_info(" Running '%s' from $PATH" % KINDLEGEN) else: kindlegen_path = args.kindlegen_path print_info(" Running '%s' from '%s'" % (KINDLEGEN, kindlegen_path)) proc = subprocess.Popen([ kindlegen_path, opf_file_path_absolute, "-o", mobi_file_path_relative ], stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE) output = proc.communicate() if args.debug: output_unicode = (output[0]).decode("utf-8") print_debug(output_unicode, args.debug) copy_file(mobi_file_path_absolute, output_file_path_absolute) result = [output_file_path] print_debug("Creating .mobi file with kindlegen... done", args.debug) except OSError as exc: print_error(" Unable to run '%s' as '%s'" % (KINDLEGEN, kindlegen_path)) print_error(" Please make sure '%s':" % KINDLEGEN) print_error(" 1. is available on your $PATH or") print_error(" 2. specify its path with --kindlegen-path") # delete tmp directory tmp_path = mobi.get_tmp_path() if args.keep: print_info("Not deleting temp dir '%s'" % (tmp_path)) else: mobi.delete() print_debug("Deleted temp dir '%s'" % (tmp_path), args.debug) return result
def write(dictionary, args, output_file_path): # result to be returned result = None # get absolute path output_file_path_absolute = os.path.abspath(output_file_path) # get absolute path for collation function file bookeen_collation_function_path = None if args.bookeen_collation_function is not None: bookeen_collation_function_path = os.path.abspath(args.bookeen_collation_function) # create tmp directory cwd = os.getcwd() tmp_path = create_temp_directory() print_debug("Working in temp dir '%s'" % (tmp_path), args.debug) os.chdir(tmp_path) # get the basename base = os.path.basename(output_file_path) if base.endswith(".zip"): base = base[:-4] # copy empty.idx into tmp_path idx_file_path = base + u".dict.idx" dict_file_path = base + u".dict" copy_file(EMPTY_FILE_PATH, idx_file_path) # open index sql_connection = sqlite3.connect(idx_file_path) # install collation in the index collation_function = collate_function_default if bookeen_collation_function_path is not None: try: collation_function = imp.load_source("", bookeen_collation_function_path).collate_function print_debug("Using collation function from '%s'" % (bookeen_collation_function_path), args.debug) except: print_error("Unable to load collation function from '%s'. Using the default collation function instead." % (bookeen_collation_function_path)) sql_connection.create_collation("IcuNoCase", collation_function) sql_connection.text_factory = str # get a cursor and delete any data from the index file sql_cursor = sql_connection.cursor() sql_cursor.execute("delete from T_DictIndex") # write c_* files # each c_* file has MAX_CHUNK_SIZE < size <= (MAX_CHUNK_SIZE * 2) bytes (tentatively) print_debug("Writing c_* files...", args.debug) files_to_compress = [] current_offset = 0 chunk_index = 1 chunk_file_path = "%s%d" % (CHUNK_FILE_PREFIX, chunk_index) files_to_compress.append(chunk_file_path) chunk_file_obj = open(chunk_file_path, "wb") for entry_index in dictionary.entries_index_sorted: entry = dictionary.entries[entry_index] definition_bytes = entry.definition.encode("utf-8") definition_size = len(definition_bytes) chunk_file_obj.write(definition_bytes) # insert headword into index file sql_tuple = (0, entry.headword, current_offset, definition_size, chunk_index) sql_cursor.execute("insert into T_DictIndex values (?,?,?,?,?)", sql_tuple) # insert synonyms into index file if not args.ignore_synonyms: for synonym in entry.get_synonyms(): sql_tuple = (0, synonym[0], current_offset, definition_size, chunk_index) sql_cursor.execute("insert into T_DictIndex values (?,?,?,?,?)", sql_tuple) # update offset current_offset += definition_size # if we reached CHUNK_SIZE, open the next c_* file if current_offset > CHUNK_SIZE: chunk_file_obj.close() chunk_index += 1 chunk_file_path = "%s%d" % (CHUNK_FILE_PREFIX, chunk_index) files_to_compress.append(chunk_file_path) chunk_file_obj = open(chunk_file_path, "wb") current_offset = 0 chunk_file_obj.close() print_debug("Writing c_* files... done", args.debug) # compress print_debug("Compressing c_* files...", args.debug) file_zip_obj = zipfile.ZipFile(dict_file_path, "w", zipfile.ZIP_DEFLATED) for file_to_compress in files_to_compress: file_to_compress = os.path.basename(file_to_compress) file_zip_obj.write(file_to_compress) file_zip_obj.close() print_debug("Compressing c_* files... done", args.debug) # update index metadata print_debug("Updating index metadata...", args.debug) header = HEADER % (args.language_from) sql_cursor.execute("update T_DictInfo set F_xhtmlHeader=?", (header,)) sql_cursor.execute("update T_DictInfo set F_LangFrom=?", (args.language_from,)) sql_cursor.execute("update T_DictInfo set F_LangTo=?", (args.language_to,)) sql_cursor.execute("update T_DictInfo set F_Licence=?", (args.license,)) sql_cursor.execute("update T_DictInfo set F_Copyright=?", (args.copyright,)) sql_cursor.execute("update T_DictInfo set F_Title=?", (args.title,)) sql_cursor.execute("update T_DictInfo set F_Description=?", (args.description,)) sql_cursor.execute("update T_DictInfo set F_Year=?", (args.year,)) # the meaning of the following is unknown sql_cursor.execute("update T_DictInfo set F_Alphabet=?", ("Z",)) sql_cursor.execute("update T_DictInfo set F_CollationLevel=?", ("1",)) sql_cursor.execute("update T_DictVersion set F_DictType=?", ("stardict",)) sql_cursor.execute("update T_DictVersion set F_Version=?", ("11",)) print_debug("Updating index metadata... done", args.debug) # compact and close sql_cursor.execute("vacuum") sql_cursor.close() sql_connection.close() # create .install file or copy .dict.idx and .dict into requested output directory parent_output_directory = os.path.split(output_file_path_absolute)[0] if args.bookeen_install_file: print_debug("Creating .install file...", args.debug) file_zip_path = os.path.join(parent_output_directory, base + u".install") file_zip_obj = zipfile.ZipFile(file_zip_path, "w", zipfile.ZIP_DEFLATED) for file_to_compress in [dict_file_path, idx_file_path]: file_to_compress = os.path.basename(file_to_compress) file_zip_obj.write(file_to_compress) file_zip_obj.close() result = [file_zip_path] print_debug("Creating .install file... done", args.debug) else: print_debug("Copying .dict.idx and .dict files...", args.debug) dict_file_path_final = os.path.join(parent_output_directory, os.path.basename(dict_file_path)) idx_file_path_final = os.path.join(parent_output_directory, os.path.basename(idx_file_path)) copy_file(dict_file_path, dict_file_path_final) copy_file(idx_file_path, idx_file_path_final) result = [idx_file_path_final, dict_file_path_final] print_debug("Copying .dict.idx and .dict files... done", args.debug) # delete tmp directory os.chdir(cwd) if args.keep: print_info("Not deleting temp dir '%s'" % (tmp_path)) else: delete_directory(tmp_path) print_debug("Deleted temp dir '%s'" % (tmp_path), args.debug) return result