def parseFile(fp): root_folder = import_util.Folder('') folder_stack = [root_folder] iterator = iterRecords(fp) for lineno, type, name, attrs in iterator: if type == FOLDER: if attrs.has_key('trashfolder'): # skipping everything under the trash folder trash_count = 1 for lineno, type, name, attrs in iterator: if type == FOLDER: trash_count += 1 log.info('drop %s', name) elif type == DASH: trash_count -= 1 if trash_count == 0: break continue if not name: log.warn('Invalid name line %s', lineno+1) continue folder = import_util.Folder(name) folder_stack[-1].children.append(folder) folder_stack.append(folder) elif type == URL: if not name: log.warn('Invalid name line %s', lineno+1) continue created = attrs.get('created','') created = import_util._ctime_str_2_iso8601(created) # Opera doesn't have modified. Map visited to modified. modified = attrs.get('visited','') modified = import_util._ctime_str_2_iso8601(modified) page = import_util.Bookmark( name, url = attrs.get('url',''), description = attrs.get('description',''), created = created, modified = modified, ) folder_stack[-1].children.append(page) elif type == SEPERATOR: pass elif type == DASH: if len(folder_stack) <= 1: raise RuntimeError('Unmatched "-" line: %s' % (lineno+1,)) else: folder_stack.pop() return root_folder
def parseLink(tokens, attrs): # <a> url = _get_attr(attrs, 'href') last_modified = _get_attr(attrs, 'last_modified') add_date = _get_attr(attrs, 'add_date') last_modified = import_util._ctime_str_2_iso8601(last_modified) add_date = import_util._ctime_str_2_iso8601(add_date) title = [] for kind, data, attrs in tokens: if kind == DATA: title.append(data) elif kind == ENDTAG and data == 'a': break elif data in ('dl','dt', 'dd'): # malformed! tokens.push_back((kind,data,attrs)) break return _join_text(title), url, add_date, last_modified