def run_export(filenames, format): """Exports the specified databases in specified format.""" dbs = [skypedata.SkypeDatabase(f) for f in filenames] is_xlsx_single = ("xlsx_single" == format) for db in dbs: formatargs = collections.defaultdict(str) formatargs["skypename"] = os.path.basename(db.filename) formatargs.update(db.account or {}) basename = util.safe_filename(conf.ExportDbTemplate % formatargs) dbstr = "from %s " % db if len(dbs) != 1 else "" if is_xlsx_single: export_dir = os.getcwd() filename = util.unique_path("%s.xlsx" % basename) else: export_dir = util.unique_path(os.path.join(os.getcwd(), basename)) filename = format target = filename if is_xlsx_single else export_dir try: print("Exporting as %s %sto %s." % (format[:4].upper(), dbstr, target)) chats = sorted(db.get_conversations(), key=lambda x: x["title"].lower()) db.get_conversations_stats(chats) bar_total = sum(c["message_count"] for c in chats) bartext = " Exporting %.*s.." % (30, db.filename) # Enforce width bar = ProgressBar(max=bar_total, afterword=bartext) bar.start() result = export.export_chats(chats, export_dir, filename, db, progress=bar.update) files, count = result bar.stop() if count: bar.afterword = " Exported %s to %s. " % (db, target) bar.update(bar_total) print() log("Exported %s %sto %s as %s.", util.plural("chat", count), dbstr, target, format) else: print("\nNo messages to export%s." % ("" if len(dbs) == 1 else " from %s" % db)) os.unlink(filename) if is_xlsx_single else os.rmdir(export_dir) except Exception as e: print("Error exporting chats: %s\n\n%s" % (e, traceback.format_exc()))
def run_merge(filenames): """Merges all Skype databases to a new database.""" dbs = [skypedata.SkypeDatabase(f) for f in filenames] db_base = dbs.pop() counts = collections.defaultdict(lambda: collections.defaultdict(int)) postbacks = Queue.Queue() postfunc = lambda r: postbacks.put(r) worker = workers.MergeThread(postfunc) name, ext = os.path.splitext(os.path.split(db_base.filename)[-1]) now = datetime.datetime.now().strftime("%Y%m%d") filename_final = util.unique_path("%s.merged.%s%s" % (name, now, ext)) print("Creating %s, using %s as base." % (filename_final, db_base)) shutil.copyfile(db_base.filename, filename_final) db2 = skypedata.SkypeDatabase(filename_final) chats2 = db2.get_conversations() db2.get_conversations_stats(chats2) for db1 in dbs: chats = db1.get_conversations() db1.get_conversations_stats(chats) bar_total = sum(c["message_count"] for c in chats) bar_text = " Processing %.*s.." % (30, db1) bar = ProgressBar(max=bar_total, afterword=bar_text) bar.start() args = {"db1": db1, "db2": db2, "chats": chats, "type": "diff_merge_left"} worker.work(args) while True: result = postbacks.get() if "error" in result: print("Error merging %s:\n\n%s" % (db1, result["error"])) worker = None # Signal for global break break # break while True if "done" in result: break # break while True if "diff" in result: counts[db1]["chats"] += 1 counts[db1]["msgs"] += len(result["diff"]["messages"]) msgcounts = sum(c["message_count"] for c in result["chats"]) bar.update(bar.value + msgcounts) if result["output"]: log(result["output"]) if not worker: break # break for db1 in dbs bar.stop() bar.afterword = " Processed %s." % db1 bar.update(bar_total) print if not counts: print("Nothing new to merge.") db2.close() os.unlink(filename_final) else: for db1 in dbs: print("Merged %s in %s from %s." % (util.plural("message", counts[db1]["msgs"]), util.plural("chat", counts[db1]["chats"]), db1)) print("Merge into %s complete." % db2)
def make_filename(chat): if len(format) > 4: # Filename already given in format filename = os.path.join(path, format) else: filename = "Skype %s.%s" % (chat["title_long_lc"], format) filename = os.path.join(path, util.safe_filename(filename)) filename = util.unique_path(filename) return filename
def make_filename(chat): if len(format) > 4: # Filename already given in format filename = os.path.join(path, format) else: args = collections.defaultdict(str); args.update(chat) filename = "%s.%s" % (conf.ExportChatTemplate % args, format) filename = os.path.join(path, util.safe_filename(filename)) filename = util.unique_path(filename) return filename
def make_filename(chat): if len(format) > 4: # Filename already given in format filename = os.path.join(path, format) else: args = collections.defaultdict(str) args.update(chat) filename = "%s.%s" % (conf.ExportChatTemplate % args, format) filename = os.path.join(path, util.safe_filename(filename)) filename = util.unique_path(filename) return filename
def export_chat_template(chat, filename, db, messages): """ Exports the chat messages to file using templates. @param chat chat data dict, as returned from SkypeDatabase @param filename full path and filename of resulting file, file extension .html|.txt determines file format @param db SkypeDatabase instance @param messages list of message data dicts """ tmpfile, tmpname = None, None # Temporary file for exported messages try: is_html = filename.lower().endswith(".html") parser = skypedata.MessageParser(db, chat=chat, stats=True) namespace = {"db": db, "chat": chat, "messages": messages, "parser": parser} # As HTML and TXT contain statistics in their headers before # messages, write out all messages to a temporary file first, # statistics will be available for the main file after parsing. # Cannot keep all messages in memory at once - very large chats # (500,000+ messages) can take gigabytes. tmpname = util.unique_path("%s.messages" % filename) tmpfile = open(tmpname, "w+") mtemplate = templates.CHAT_MESSAGES_HTML if is_html \ else templates.CHAT_MESSAGES_TXT step.Template(mtemplate, strip=False).stream(tmpfile, namespace) namespace["stats"] = stats = parser.get_collected_stats() namespace.update({ "date1": stats["startdate"].strftime("%d.%m.%Y") if stats.get("startdate") else "", "date2": stats["enddate"].strftime("%d.%m.%Y") if stats.get("enddate") else "", "emoticons_used": [x for x in parser.emoticons_unique if hasattr(emoticons, x)], "message_count": stats.get("messages", 0), }) if is_html: # Collect chat and participant images. namespace.update({"participants": [], "chat_picture_size": None, "chat_picture_raw": None, }) if chat["meta_picture"]: raw = skypedata.fix_image_raw(chat["meta_picture"]) namespace["chat_picture_raw"] = raw namespace["chat_picture_size"] = util.img_size(raw) contacts = dict((c["skypename"], c) for c in db.get_contacts()) partics = dict((p["identity"], p) for p in chat["participants"]) # There can be authors not among participants, and vice versa for author in stats["authors"].union(partics): contact = partics.get(author, {}).get("contact") contact = contact or contacts.get(author, {}) contact = contact or {"identity": author, "name": author} bmp = contact.get("avatar_bitmap") raw = contact.get("avatar_raw_small") or "" raw_large = contact.get("avatar_raw_large") or "" if not raw and not bmp: raw = skypedata.get_avatar_raw(contact, conf.AvatarImageSize) raw = bmp and util.img_wx_to_raw(bmp) or raw if raw: raw_large = raw_large or skypedata.get_avatar_raw( contact, conf.AvatarImageLargeSize) contact["avatar_raw_small"] = raw contact["avatar_raw_large"] = raw_large contact["rank"] = partics.get(author, {}).get("rank") namespace["participants"].append(contact) tmpfile.flush(), tmpfile.seek(0) namespace["message_buffer"] = iter(lambda: tmpfile.read(65536), "") template = templates.CHAT_HTML if is_html else templates.CHAT_TXT with open(filename, "w") as f: step.Template(template, strip=False).stream(f, namespace) finally: if tmpfile: util.try_until(tmpfile.close) if tmpname: util.try_until(lambda: os.unlink(tmpname))
def export_chat_template(chat, filename, db, messages): """ Exports the chat messages to file using templates. @param chat chat data dict, as returned from SkypeDatabase @param filename full path and filename of resulting file, file extension .html|.txt determines file format @param db SkypeDatabase instance @param messages list of message data dicts """ tmpfile, tmpname = None, None # Temporary file for exported messages try: is_html = filename.lower().endswith(".html") parser = skypedata.MessageParser(db, chat=chat, stats=is_html) namespace = {"db": db, "chat": chat, "messages": messages, "parser": parser} if is_html: # Collect chat and participant images. namespace.update({"participants": [], "chat_picture_size": None, "chat_picture_raw": None, }) if chat["meta_picture"]: raw = skypedata.fix_image_raw(chat["meta_picture"]) imgparser = ImageFile.Parser(); imgparser.feed(raw) img = imgparser.close() namespace.update(chat_picture_size=img.size, chat_picture_raw=raw) for p in chat["participants"]: contact = p["contact"].copy() namespace["participants"].append(contact) contact.update(avatar_raw_small="", avatar_raw_large="") bmp = contact.get("avatar_bitmap") raw = contact.get("avatar_raw_small") raw_large = contact.get("avatar_raw_large") if not raw and not bmp: raw = skypedata.get_avatar_raw(contact, conf.AvatarImageSize) if raw: p["contact"]["avatar_raw_small"] = raw raw = bmp and util.wx_bitmap_to_raw(bmp) or raw if raw: if not raw_large: size_large = conf.AvatarImageLargeSize raw_large = skypedata.get_avatar_raw(contact, size_large) p["contact"]["avatar_raw_large"] = raw_large contact["avatar_raw_small"] = raw contact["avatar_raw_large"] = raw_large # As HTML and TXT contain statistics in their headers before # messages, write out all messages to a temporary file first, # statistics will be available for the main file after parsing. # Cannot keep all messages in memory at once - very large chats # (500,000+ messages) can take gigabytes. tmpname = util.unique_path("%s.messages" % filename) tmpfile = open(tmpname, "w+") mtemplate = templates.CHAT_MESSAGES_HTML if is_html \ else templates.CHAT_MESSAGES_TXT step.Template(mtemplate, strip=False).stream(tmpfile, namespace) namespace["stats"] = stats = parser.get_collected_stats() namespace.update({ "date1": stats["startdate"].strftime("%d.%m.%Y") if stats.get("startdate") else "", "date2": stats["enddate"].strftime("%d.%m.%Y") if stats.get("enddate") else "", "emoticons_used": list(filter(lambda e: hasattr(emoticons, e), parser.emoticons_unique)), "message_count": stats.get("messages", 0), }) tmpfile.flush(), tmpfile.seek(0) namespace["message_buffer"] = iter(lambda: tmpfile.read(65536), "") template = templates.CHAT_HTML if is_html else templates.CHAT_TXT with open(filename, "w") as f: step.Template(template, strip=False).stream(f, namespace) finally: if tmpfile: util.try_until(tmpfile.close) if tmpname: util.try_until(lambda: os.unlink(tmpname))
def run_export(filenames, format, chatnames, authornames, ask_password): """Exports the specified databases in specified format.""" dbs = [skypedata.SkypeDatabase(f) for f in filenames] is_xlsx_single = ("xlsx_single" == format) for db in dbs: if (ask_password and db.id and conf.SharedImageAutoDownload and format.lower().endswith("html")): prompt = "Enter Skype password for '%s': " % db.id while not skypedata.SharedImageDownload.has_login(db.id): with warnings.catch_warnings(): warnings.simplefilter("ignore") # possible GetPassWarning output(prompt, end="") # getpass output can raise errors pw = getpass.getpass("", io.BytesIO()) if not pw: continue # while try: skypedata.SharedImageDownload.login(db.id, pw) except Exception as e: log("Error signing in %s on Skype web.\n\n%s", db.id, util.format_exc(e)) prompt = "%s\nEnter Skype password for '%s': " % (e, db.id) formatargs = collections.defaultdict(str) formatargs["skypename"] = os.path.basename(db.filename) formatargs.update(db.account or {}) basename = util.safe_filename(conf.ExportDbTemplate % formatargs) dbstr = "from %s " % db if len(dbs) != 1 else "" if is_xlsx_single: export_dir = os.getcwd() filename = util.unique_path("%s.xlsx" % basename) else: export_dir = util.unique_path(os.path.join(os.getcwd(), basename)) filename = format target = filename if is_xlsx_single else export_dir try: extras = [("", chatnames)] if chatnames else [] extras += [(" with authors", authornames)] if authornames else [] output("Exporting%s%s as %s %sto %s." % (" chats" if extras else "", ",".join("%s like %s" % (x, y) for x, y in extras), format[:4].upper(), dbstr, target)) chats = sorted(db.get_conversations(chatnames, authornames), key=lambda x: x["title"].lower()) db.get_conversations_stats(chats) bar_total = sum(c["message_count"] for c in chats) bartext = " Exporting %.*s.." % (30, db.filename) # Enforce width bar = ProgressBar(max=bar_total, afterword=bartext) bar.start() result = export.export_chats(chats, export_dir, filename, db, progress=bar.update) files, count = result bar.stop() if count: bar.afterword = " Exported %s to %s. " % (db, target) bar.update(bar_total) output() log("Exported %s %sto %s as %s.", util.plural("chat", count), dbstr, target, format) else: output("\nNo messages to export%s." % ("" if len(dbs) == 1 else " from %s" % db)) os.unlink(filename) if is_xlsx_single else os.rmdir(export_dir) except Exception as e: output("Error exporting chats: %s\n\n%s" % (e, traceback.format_exc()))
def run_merge(filenames, output_filename=None): """Merges all Skype databases to a new database.""" dbs = [skypedata.SkypeDatabase(f) for f in filenames] db_base = dbs.pop() counts = collections.defaultdict(lambda: collections.defaultdict(int)) postbacks = Queue.Queue() name, ext = os.path.splitext(os.path.split(db_base.filename)[-1]) now = datetime.datetime.now().strftime("%Y%m%d") if not output_filename: output_filename = util.unique_path("%s.merged.%s%s" % (name, now, ext)) output("Creating %s, using %s as base." % (output_filename, db_base)) bar = ProgressBar() bar.start() shutil.copyfile(db_base.filename, output_filename) db2 = skypedata.SkypeDatabase(output_filename) chats2 = db2.get_conversations() db2.get_conversations_stats(chats2) args = {"db2": db2, "type": "diff_merge_left"} worker = workers.MergeThread(postbacks.put) try: for db1 in dbs: chats = db1.get_conversations() db1.get_conversations_stats(chats) bar.afterword = " Processing %.*s.." % (30, db1) worker.work(dict(args, db1=db1, chats=chats)) while True: result = postbacks.get() if "error" in result: output("Error merging %s:\n\n%s" % (db1, result["error"])) db1 = None # Signal for global break break # break while True if "done" in result: break # break while True if "diff" in result: counts[db1]["chats"] += 1 counts[db1]["msgs"] += len(result["diff"]["messages"]) if "index" in result: bar.max = result["count"] bar.update(result["index"]) if result.get("output"): log(result["output"]) if not db1: break # break for db1 in dbs bar.stop() bar.afterword = " Processed %s." % db1 bar.update(bar.max) output() finally: worker and (worker.stop(), worker.join()) if not counts: output("Nothing new to merge.") db2.close() os.unlink(output_filename) else: for db1 in dbs: output("Merged %s in %s from %s." % (util.plural("message", counts[db1]["msgs"]), util.plural("chat", counts[db1]["chats"]), db1)) output("Merge into %s complete." % db2)
def export_chat_template(chat, filename, db, messages): """ Exports the chat messages to file using templates. @param chat chat data dict, as returned from SkypeDatabase @param filename full path and filename of resulting file, file extension .html|.txt determines file format @param db SkypeDatabase instance @param messages list of message data dicts """ tmpfile, tmpname = None, None # Temporary file for exported messages try: is_html = filename.lower().endswith(".html") parser = skypedata.MessageParser(db, chat=chat, stats=True) namespace = { "db": db, "chat": chat, "messages": messages, "parser": parser } # As HTML and TXT contain statistics in their headers before # messages, write out all messages to a temporary file first, # statistics will be available for the main file after parsing. # Cannot keep all messages in memory at once - very large chats # (500,000+ messages) can take gigabytes. tmpname = util.unique_path("%s.messages" % filename) tmpfile = open(tmpname, "w+") mtemplate = templates.CHAT_MESSAGES_HTML if is_html \ else templates.CHAT_MESSAGES_TXT step.Template(mtemplate, strip=False).stream(tmpfile, namespace) namespace["stats"] = stats = parser.get_collected_stats() namespace.update({ "date1": stats["startdate"].strftime("%d.%m.%Y") if stats.get("startdate") else "", "date2": stats["enddate"].strftime("%d.%m.%Y") if stats.get("enddate") else "", "emoticons_used": [x for x in parser.emoticons_unique if hasattr(emoticons, x)], "message_count": stats.get("messages", 0), }) if is_html: # Collect chat and participant images. namespace.update({ "participants": [], "chat_picture_size": None, "chat_picture_raw": None, }) if chat["meta_picture"]: raw = skypedata.fix_image_raw(chat["meta_picture"]) namespace["chat_picture_raw"] = raw namespace["chat_picture_size"] = util.img_size(raw) contacts = dict((c["skypename"], c) for c in db.get_contacts()) partics = dict((p["identity"], p) for p in chat["participants"]) # There can be authors not among participants, and vice versa for author in stats["authors"].union(partics): contact = partics.get(author, {}).get("contact") contact = contact or contacts.get(author, {}) contact = contact or {"identity": author, "name": author} bmp = contact.get("avatar_bitmap") raw = contact.get("avatar_raw_small") or "" raw_large = contact.get("avatar_raw_large") or "" if not raw and not bmp: raw = skypedata.get_avatar_raw(contact, conf.AvatarImageSize) raw = bmp and util.img_wx_to_raw(bmp) or raw if raw: raw_large = raw_large or skypedata.get_avatar_raw( contact, conf.AvatarImageLargeSize) contact["avatar_raw_small"] = raw contact["avatar_raw_large"] = raw_large contact["rank"] = partics.get(author, {}).get("rank") namespace["participants"].append(contact) tmpfile.flush(), tmpfile.seek(0) namespace["message_buffer"] = iter(lambda: tmpfile.read(65536), "") template = templates.CHAT_HTML if is_html else templates.CHAT_TXT with open(filename, "w") as f: step.Template(template, strip=False).stream(f, namespace) finally: if tmpfile: util.try_until(tmpfile.close) if tmpname: util.try_until(lambda: os.unlink(tmpname))
def export_chat_template(chat, filename, db, messages): """ Exports the chat messages to file using templates. @param chat chat data dict, as returned from SkypeDatabase @param filename full path and filename of resulting file, file extension .html|.txt determines file format @param db SkypeDatabase instance @param messages list of message data dicts """ tmpfile, tmpname = None, None # Temporary file for exported messages try: is_html = filename.lower().endswith(".html") parser = skypedata.MessageParser(db, chat=chat, stats=is_html) namespace = { "db": db, "chat": chat, "messages": messages, "parser": parser } if is_html: # Collect chat and participant images. namespace.update({ "participants": [], "chat_picture_size": None, "chat_picture_raw": None, }) if chat["meta_picture"]: raw = skypedata.fix_image_raw(chat["meta_picture"]) imgparser = ImageFile.Parser() imgparser.feed(raw) img = imgparser.close() namespace.update(chat_picture_size=img.size, chat_picture_raw=raw) for p in chat["participants"]: contact = p["contact"].copy() namespace["participants"].append(contact) contact.update(avatar_raw_small="", avatar_raw_large="") bmp = contact.get("avatar_bitmap") raw = contact.get("avatar_raw_small") raw_large = contact.get("avatar_raw_large") if not raw and not bmp: raw = skypedata.get_avatar_raw(contact, conf.AvatarImageSize) if raw: p["contact"]["avatar_raw_small"] = raw raw = bmp and util.wx_bitmap_to_raw(bmp) or raw if raw: if not raw_large: size_large = conf.AvatarImageLargeSize raw_large = skypedata.get_avatar_raw( contact, size_large) p["contact"]["avatar_raw_large"] = raw_large contact["avatar_raw_small"] = raw contact["avatar_raw_large"] = raw_large # As HTML and TXT contain statistics in their headers before # messages, write out all messages to a temporary file first, # statistics will be available for the main file after parsing. # Cannot keep all messages in memory at once - very large chats # (500,000+ messages) can take gigabytes. tmpname = util.unique_path("%s.messages" % filename) tmpfile = open(tmpname, "w+") mtemplate = templates.CHAT_MESSAGES_HTML if is_html \ else templates.CHAT_MESSAGES_TXT step.Template(mtemplate, strip=False).stream(tmpfile, namespace) namespace["stats"] = stats = parser.get_collected_stats() namespace.update({ "date1": stats["startdate"].strftime("%d.%m.%Y") if stats.get("startdate") else "", "date2": stats["enddate"].strftime("%d.%m.%Y") if stats.get("enddate") else "", "emoticons_used": list( filter(lambda e: hasattr(emoticons, e), parser.emoticons_unique)), "message_count": stats.get("messages", 0), }) tmpfile.flush(), tmpfile.seek(0) namespace["message_buffer"] = iter(lambda: tmpfile.read(65536), "") template = templates.CHAT_HTML if is_html else templates.CHAT_TXT with open(filename, "w") as f: step.Template(template, strip=False).stream(f, namespace) finally: if tmpfile: util.try_until(tmpfile.close) if tmpname: util.try_until(lambda: os.unlink(tmpname))
def run_export(filenames, format, chatnames, authornames, ask_password): """Exports the specified databases in specified format.""" dbs = [skypedata.SkypeDatabase(f) for f in filenames] is_xlsx_single = ("xlsx_single" == format) for db in dbs: if (ask_password and db.id and conf.SharedImageAutoDownload and format.lower().endswith("html")): prompt = "Enter Skype password for '%s': " % db.id while not skypedata.SharedImageDownload.has_login(db.id): with warnings.catch_warnings(): warnings.simplefilter("ignore") # possible GetPassWarning output(prompt, end="") # getpass output can raise errors pw = getpass.getpass("", io.BytesIO()) if not pw: continue # while try: skypedata.SharedImageDownload.login(db.id, pw) except Exception as e: log("Error signing in %s on Skype web.\n\n%s", db.id, util.format_exc(e)) prompt = "%s\nEnter Skype password for '%s': " % (e, db.id) formatargs = collections.defaultdict(str) formatargs["skypename"] = os.path.basename(db.filename) formatargs.update(db.account or {}) basename = util.safe_filename(conf.ExportDbTemplate % formatargs) dbstr = "from %s " % db if len(dbs) != 1 else "" if is_xlsx_single: export_dir = os.getcwd() filename = util.unique_path("%s.xlsx" % basename) else: export_dir = util.unique_path(os.path.join(os.getcwd(), basename)) filename = format target = filename if is_xlsx_single else export_dir try: extras = [("", chatnames)] if chatnames else [] extras += [(" with authors", authornames)] if authornames else [] output("Exporting%s%s as %s %sto %s." % (" chats" if extras else "", ",".join( "%s like %s" % (x, y) for x, y in extras), format[:4].upper(), dbstr, target)) chats = sorted(db.get_conversations(chatnames, authornames), key=lambda x: x["title"].lower()) db.get_conversations_stats(chats) bar_total = sum(c["message_count"] for c in chats) bartext = " Exporting %.*s.." % (30, db.filename) # Enforce width bar = ProgressBar(max=bar_total, afterword=bartext) bar.start() result = export.export_chats(chats, export_dir, filename, db, progress=bar.update) files, count = result bar.stop() if count: bar.afterword = " Exported %s to %s. " % (db, target) bar.update(bar_total) output() log("Exported %s %sto %s as %s.", util.plural("chat", count), dbstr, target, format) else: output("\nNo messages to export%s." % ("" if len(dbs) == 1 else " from %s" % db)) os.unlink(filename) if is_xlsx_single else os.rmdir(export_dir) except Exception as e: output("Error exporting chats: %s\n\n%s" % (e, traceback.format_exc()))