def get_iphoto_or_aperture_pictures(plistpath: Path, photo_class): # The structure of iPhoto and Aperture libraries for the base photo list are excactly the same. if not plistpath.exists(): return [] s = plistpath.open("rt", encoding="utf-8").read() # There was a case where a guy had 0x10 chars in his plist, causing expat errors on loading s = remove_invalid_xml(s, replace_with="") # It seems that iPhoto sometimes doesn't properly escape & chars. The regexp below is to find # any & char that is not a &-based entity (&, ", etc.). based on TextMate's XML # bundle's regexp s, count = re.subn(r"&(?![a-zA-Z0-9_-]+|#[0-9]+|#x[0-9a-fA-F]+;)", "", s) if count: logging.warning("%d invalid XML entities replacement made", count) parser = IPhotoPlistParser() try: plist = parser.parse(io.BytesIO(s.encode("utf-8"))) except Exception: logging.warning("iPhoto plist parsing choked on data: %r", parser.lastdata) raise result = [] for key, photo_data in plist["Master Image List"].items(): if photo_data["MediaType"] != "Image": continue photo_path = Path(photo_data["ImagePath"]) photo = photo_class(photo_path, key) result.append(photo) return result
def get_iphoto_or_aperture_pictures(plistpath: Path, photo_class): # The structure of iPhoto and Aperture libraries for the base photo list are excactly the same. if not plistpath.exists(): return [] s = plistpath.open('rt', encoding='utf-8').read() # There was a case where a guy had 0x10 chars in his plist, causing expat errors on loading s = remove_invalid_xml(s, replace_with='') # It seems that iPhoto sometimes doesn't properly escape & chars. The regexp below is to find # any & char that is not a &-based entity (&, ", etc.). based on TextMate's XML # bundle's regexp s, count = re.subn(r'&(?![a-zA-Z0-9_-]+|#[0-9]+|#x[0-9a-fA-F]+;)', '', s) if count: logging.warning("%d invalid XML entities replacement made", count) parser = IPhotoPlistParser() try: plist = parser.parse(io.BytesIO(s.encode('utf-8'))) except Exception: logging.warning("iPhoto plist parsing choked on data: %r", parser.lastdata) raise result = [] for key, photo_data in plist['Master Image List'].items(): if photo_data['MediaType'] != 'Image': continue photo_path = Path(photo_data['ImagePath']) photo = photo_class(photo_path, key) result.append(photo) return result
def fix_text(text): # If we don't remove invalid XML characters, we'll get crashes on ebook creation and reloading # of masherproj files. text = remove_invalid_xml(text) # This search/replace function is based on heuristic discoveries from sample pdf I've received. # &dquo; comes from a pdf file with quotes in it. dquo is weird because it looks like an html # escape but it isn't. Anyway, just replace it with quotes. text = text.replace('&dquo;', '"') # We also want to normalize spaces, that is: remove double spaces and remove spaces after or # before a newline. text = RE_MULTIPLE_SPACES.sub(' ', text) text = RE_NEWLINE_AND_SPACE.sub('\n', text) return text
def get_itunes_songs(plistpath): if not plistpath.exists(): return [] s = plistpath.open('rt', encoding='utf-8').read() # iTunes sometimes produces XML files with invalid characters in it. s = remove_invalid_xml(s, replace_with='') plist = plistlib.readPlistFromBytes(s.encode('utf-8')) result = [] for song_data in plist['Tracks'].values(): try: if song_data['Track Type'] != 'File': continue song = ITunesSong(song_data) except KeyError: # No "Track Type", "Location" or "Track ID" key in track continue if song.path.exists(): result.append(song) return result
def get_iphoto_or_aperture_pictures(plistpath, photo_class): # The structure of iPhoto and Aperture libraries for the base photo list are excactly the same. if not io.exists(plistpath): return [] s = io.open(plistpath, 'rt', encoding='utf-8').read() # There was a case where a guy had 0x10 chars in his plist, causing expat errors on loading s = remove_invalid_xml(s, replace_with='') # It seems that iPhoto sometimes doesn't properly escape & chars. The regexp below is to find # any & char that is not a &-based entity (&, ", etc.). based on TextMate's XML # bundle's regexp s, count = re.subn(r'&(?![a-zA-Z0-9_-]+|#[0-9]+|#x[0-9a-fA-F]+;)', '', s) if count: logging.warning("%d invalid XML entities replacement made", count) plist = plistlib.readPlistFromBytes(s.encode('utf-8')) result = [] for key, photo_data in plist['Master Image List'].items(): if photo_data['MediaType'] != 'Image': continue photo_path = Path(photo_data['ImagePath']) photo = photo_class(photo_path, key) result.append(photo) return result
def save(filename, document_id, properties, accounts, groups, transactions, schedules, budgets): def date2str(date): return date.strftime('%Y-%m-%d') def handle_newlines(s): # etree doesn't correctly save newlines. In fields that allow it, we have to escape them so # that we can restore them during load. # XXX It seems like newer version of etree do escape newlines. When we use Python 3.2, we # can probably remove this. if not s: return s return s.replace('\n', '\\n') def setattrib(attribs, attribname, value): if value: attribs[attribname] = value def write_transaction_element(parent_element, transaction): transaction_element = ET.SubElement(parent_element, 'transaction') attrib = transaction_element.attrib attrib['date'] = date2str(transaction.date) setattrib(attrib, 'description', transaction.description) setattrib(attrib, 'payee', transaction.payee) setattrib(attrib, 'checkno', transaction.checkno) setattrib(attrib, 'notes', handle_newlines(transaction.notes)) attrib['mtime'] = str(int(transaction.mtime)) for split in transaction.splits: split_element = ET.SubElement(transaction_element, 'split') attrib = split_element.attrib attrib['account'] = split.account_name attrib['amount'] = format_amount(split.amount) setattrib(attrib, 'memo', split.memo) setattrib(attrib, 'reference', split.reference) if split.reconciliation_date is not None: attrib['reconciliation_date'] = date2str( split.reconciliation_date) root = ET.Element('moneyguru-file') root.attrib['document_id'] = document_id props_element = ET.SubElement(root, 'properties') for name, value in properties.items(): if name == 'default_currency': value = value.code else: value = str(value) props_element.attrib[name] = value for group in groups: group_element = ET.SubElement(root, 'group') attrib = group_element.attrib attrib['name'] = group.name attrib['type'] = group.type for account in accounts: account_element = ET.SubElement(root, 'account') attrib = account_element.attrib attrib['name'] = account.name attrib['currency'] = account.currency.code attrib['type'] = account.type if account.group: attrib['group'] = account.group.name if account.reference is not None: attrib['reference'] = account.reference if account.account_number: attrib['account_number'] = account.account_number if account.inactive: attrib['inactive'] = 'y' if account.notes: attrib['notes'] = handle_newlines(account.notes) for transaction in transactions: write_transaction_element(root, transaction) # the functionality of the line below is untested because it's an optimisation scheduled = [s for s in schedules if s.is_alive] for recurrence in scheduled: recurrence_element = ET.SubElement(root, 'recurrence') attrib = recurrence_element.attrib attrib['type'] = recurrence.repeat_type attrib['every'] = str(recurrence.repeat_every) if recurrence.stop_date is not None: attrib['stop_date'] = date2str(recurrence.stop_date) for date, change in recurrence.date2globalchange.items(): change_element = ET.SubElement(recurrence_element, 'change') change_element.attrib['date'] = date2str(date) if change is not None: write_transaction_element(change_element, change) for date, exception in recurrence.date2exception.items(): exception_element = ET.SubElement(recurrence_element, 'exception') exception_element.attrib['date'] = date2str(date) if exception is not None: write_transaction_element(exception_element, exception) write_transaction_element(recurrence_element, recurrence.ref) for budget in budgets: budget_element = ET.SubElement(root, 'budget') attrib = budget_element.attrib attrib['account'] = budget.account.name attrib['type'] = budget.repeat_type attrib['every'] = str(budget.repeat_every) attrib['amount'] = format_amount(budget.amount) attrib['notes'] = budget.notes if budget.target is not None: attrib['target'] = budget.target.name attrib['start_date'] = date2str(budget.start_date) if budget.stop_date is not None: attrib['stop_date'] = date2str(budget.stop_date) for elem in root.getiterator(): attrib = elem.attrib for key, value in attrib.items(): attrib[key] = remove_invalid_xml(value) tree = ET.ElementTree(root) ensure_folder(op.dirname(filename)) fp = open(filename, 'wt', encoding='utf-8') fp.write('<?xml version="1.0" encoding="utf-8"?>\n') # This 'unicode' encoding thing is only available (and necessary) from Python 3.2 if sys.version_info[1] >= 2: tree.write(fp, encoding='unicode') else: tree.write(fp)
def save(filename, document_id, properties, accounts, groups, transactions, schedules, budgets): def date2str(date): return date.strftime('%Y-%m-%d') def handle_newlines(s): # etree doesn't correctly save newlines. In fields that allow it, we have to escape them so # that we can restore them during load. # XXX It seems like newer version of etree do escape newlines. When we use Python 3.2, we # can probably remove this. if not s: return s return s.replace('\n', '\\n') def setattrib(attribs, attribname, value): if value: attribs[attribname] = value def write_transaction_element(parent_element, transaction): transaction_element = ET.SubElement(parent_element, 'transaction') attrib = transaction_element.attrib attrib['date'] = date2str(transaction.date) setattrib(attrib, 'description', transaction.description) setattrib(attrib, 'payee', transaction.payee) setattrib(attrib, 'checkno', transaction.checkno) setattrib(attrib, 'notes', handle_newlines(transaction.notes)) attrib['mtime'] = str(int(transaction.mtime)) for split in transaction.splits: split_element = ET.SubElement(transaction_element, 'split') attrib = split_element.attrib attrib['account'] = split.account_name attrib['amount'] = format_amount(split.amount) setattrib(attrib, 'memo', split.memo) setattrib(attrib, 'reference', split.reference) if split.reconciliation_date is not None: attrib['reconciliation_date'] = date2str(split.reconciliation_date) root = ET.Element('moneyguru-file') root.attrib['document_id'] = document_id props_element = ET.SubElement(root, 'properties') for name, value in properties.items(): if name == 'default_currency': value = value.code else: value = str(value) props_element.attrib[name] = value for group in groups: group_element = ET.SubElement(root, 'group') attrib = group_element.attrib attrib['name'] = group.name attrib['type'] = group.type for account in accounts: account_element = ET.SubElement(root, 'account') attrib = account_element.attrib attrib['name'] = account.name attrib['currency'] = account.currency.code attrib['type'] = account.type if account.group: attrib['group'] = account.group.name if account.reference is not None: attrib['reference'] = account.reference if account.account_number: attrib['account_number'] = account.account_number if account.inactive: attrib['inactive'] = 'y' if account.notes: attrib['notes'] = handle_newlines(account.notes) for transaction in transactions: write_transaction_element(root, transaction) # the functionality of the line below is untested because it's an optimisation scheduled = [s for s in schedules if s.is_alive] for recurrence in scheduled: recurrence_element = ET.SubElement(root, 'recurrence') attrib = recurrence_element.attrib attrib['type'] = recurrence.repeat_type attrib['every'] = str(recurrence.repeat_every) if recurrence.stop_date is not None: attrib['stop_date'] = date2str(recurrence.stop_date) for date, change in recurrence.date2globalchange.items(): change_element = ET.SubElement(recurrence_element, 'change') change_element.attrib['date'] = date2str(date) if change is not None: write_transaction_element(change_element, change) for date, exception in recurrence.date2exception.items(): exception_element = ET.SubElement(recurrence_element, 'exception') exception_element.attrib['date'] = date2str(date) if exception is not None: write_transaction_element(exception_element, exception) write_transaction_element(recurrence_element, recurrence.ref) for budget in budgets: budget_element = ET.SubElement(root, 'budget') attrib = budget_element.attrib attrib['account'] = budget.account.name attrib['type'] = budget.repeat_type attrib['every'] = str(budget.repeat_every) attrib['amount'] = format_amount(budget.amount) attrib['notes'] = budget.notes if budget.target is not None: attrib['target'] = budget.target.name attrib['start_date'] = date2str(budget.start_date) if budget.stop_date is not None: attrib['stop_date'] = date2str(budget.stop_date) for elem in root.getiterator(): attrib = elem.attrib for key, value in attrib.items(): attrib[key] = remove_invalid_xml(value) tree = ET.ElementTree(root) ensure_folder(op.dirname(filename)) fp = open(filename, 'wt', encoding='utf-8') fp.write('<?xml version="1.0" encoding="utf-8"?>\n') # This 'unicode' encoding thing is only available (and necessary) from Python 3.2 if sys.version_info[1] >= 2: tree.write(fp, encoding='unicode') else: tree.write(fp)