def sync_folders(self) -> None: """ 同步为知笔记的目录 到 Joplin Folder 在为知笔记中,目录不是一种资源,它直接在配置文件中定义,在数据库中仅作为 location 字段存在 而在 Joplin 中,目录是一种标准资源 https://joplinapp.org/api/references/rest_api/#item-type-ids """ waiting_created_l2f = self.cu.get_waiting_for_created_l2f() logger.info(f'有 {len(waiting_created_l2f)} 个 folder 等待同步。') for l2f in waiting_created_l2f: jf = None logger.info(f'处理 location {l2f.location}') # level1 没有父对象 if l2f.parent_location is None: jf = self.jda.post_folder(title=l2f.title) self.cu.update_l2f(l2f.location, jf.id) else: parent_l2f: Location2Folder = self.cu.l2f_cache.get( l2f.parent_location) if parent_l2f is None: msg = f'找不到父对象 {l2f.parent_location}!' logger.error(msg) raise ValueError(msg) if parent_l2f.id is None: msg = f'父对象 {l2f.parent_location} 没有 id!' logger.error(msg) raise ValueError(msg) jf = self.jda.post_folder(title=l2f.title, parent_id=parent_l2f.id) self.cu.update_l2f(l2f.location, jf.id, jf.parent_id) # 更新了 l2f_cache 之后,要更新一次 folders self.cu.load_folders()
def load_internal_links(self) -> None: sql = 'SELECT note_id, resource_id, title, link_type FROM internal_link;' items = self.conn.execute(sql).fetchall() logger.info(f'在数据库 internal_link 中找到 {len(items)} 条内链记录。') self.internal_links = {} for item in items: jil: JoplinInternalLink = JoplinInternalLink(*item) self.internal_links[jil.id] = jil
def load_resources(self) -> None: sql = 'SELECT resource_id, title, filename, created_time, resource_type FROM resource;' items = self.conn.execute(sql).fetchall() logger.info(f'在数据库 resource 中找到 {len(items)} 条记录。') self.resources = {} for item in items: jr = JoplinResource(*item) self.resources[jr.id] = jr
def sync_all(self) -> None: """ 同步所有内容 """ self.sync_folders() self.sync_tags() logger.info(f'为知笔记转换所有文档 {len(self.ws.documents)} 篇。') for wd in self.ws.documents: self._sync_note(wd)
def load_tags(self) -> None: """ 从数据库中载入已经创建的 tag 信息 """ sql = 'SELECT tag_id, title, created_time, updated_time FROM tag;' tag_items = self.conn.execute(sql).fetchall() logger.info(f'在数据库 tag 中找到 {len(tag_items)} 条记录。') self.tags = {} for tag_item in tag_items: self.tags[tag_item[0]] = JoplinTag(*tag_item)
def get_note(self, guid: str) -> JoplinNote: """ 根据 guid 获取 note """ query = self._build_query(fields=JoplinNote.fields_str()) resp = self.client.get(f'/notes/{guid}', params=query) data = resp.json() logger.info(f'从 Joplin 获取 note {guid}: {data}') if data.get('error'): raise ValueError(data['error']) return JoplinNote(**data)
def post_folder(self, **kwargs) -> JoplinFolder: """ 创建一个新的 folder """ query = self._build_query() logger.info(f'向 Joplin 增加 folder {kwargs}') resp = self.client.post('/folders', params=query, json=kwargs) data = resp.json() if data.get('error'): logger.error(data['error']) raise ValueError(data['error']) return JoplinFolder(**data)
def post_tag(self, **kwargs) -> JoplinTag: """ 创建一个新的 tag """ query = self._build_query() logger.info(f'向 Joplin 增加 tag {kwargs}') resp = self.client.post('/tags', params=query, json=kwargs) data = resp.json() if data.get('error'): logger.error(data['error']) raise ValueError(data['error']) return JoplinTag(**data)
def get_tag(self, guid: str) -> JoplinTag: """ 根据 guid 获取 tag """ query = self._build_query(fields=JoplinTag.fields_str()) resp = self.client.get(f'/tags/{guid}', params=query) data = resp.json() logger.info(f'从 Joplin 获取 tag {guid}: {data}') if data.get('error'): logger.error(data['error']) raise ValueError(data['error']) return JoplinTag(**data)
def post_note(self, id: str, title: str, body: str, is_markdown: bool, parent_id: str, source_url: str) -> JoplinNote: """ 创建一个新的 Note 隐藏的 Joplin 参数:通过抓包 Joplin WebClipper complete Page Html source_command { 'name': 'completePageHtml', 'preProcessFor': 'html' } convert_to = html simplified Page Html source_command { 'name': 'simplifiedPageHtml', } convert_to = markdown complete page source_command = markdown { 'name': 'completePageHtml', 'preProcessFor': 'markdown' } convert_to = markdown """ kwargs = { 'id': id, 'title': title, 'parent_id': parent_id, 'markup_language': 1, } if source_url: kwargs['source_url'] = source_url if is_markdown: kwargs['body'] = body else: # 使用 joplin 的功能将所有的 html 都转换成 markdown kwargs['body_html'] = body kwargs['convert_to'] = 'markdown' kwargs['source_command'] = { 'name': 'simplifiedPageHtml', } query = self._build_query() logger.info(f'向 Joplin 增加 note {kwargs}') resp = self.client.post('/notes', params=query, json=kwargs) data = resp.json() if data.get('error'): logger.error(data['error']) raise ValueError(data['error']) return JoplinNote(**data)
def get_tags(self, guid: str) -> dict[str, JoplinTag]: """ 根据 guid 获取该 note 的所有 tag """ sql = 'SELECT tag_id, title FROM note_tag WHERE note_id=?;' items = self.conn.execute(sql, (guid, )).fetchall() logger.info(f'在数据库 note_tag 中找到 note {guid} 的 {len(items)} 条 tag 记录。') tag_dict: dict[str, JoplinTag] = {} for item in items: tag_id = item[1] tag_dict[tag_id] = self.tags[tag_id] return tag_dict
def get_internal_links(self, guid: str) -> dict[str, JoplinInternalLink]: sql = 'SELECT note_id, resource_id, title, link_type FROM internal_link WHERE note_id=?;' items = self.conn.execute(sql, (guid, )).fetchall() logger.info( f'在数据库 internal_link 中找到 note {guid} 的 {len(items)} 条内链记录。') links = {} for item in items: # 优先从缓存中获取 jil 对象 id = f'{item[0]}-{item[1]}' jil: JoplinInternalLink = self.internal_links.get( id, JoplinInternalLink(*item)) links[id] = jil return links
def init_db(self): """ 创建数据库 """ self.conn = sqlite3.connect(self.db_file) test_table = "SELECT count(*) FROM sqlite_master WHERE type='table' AND name=?;" for table in ('l2f', 'note', 'resource', 'internal_link', 'tag', 'note_tag'): table_exists = self.conn.execute(test_table, (table, )).fetchone()[0] logger.info(f'表 {table} 是否存在: {table_exists}') if not table_exists: self.conn.executescript(self.CREATE_SQL[table])
def post_resource(self, file: Path, resource_type: int, **kwargs) -> JoplinResource: """ 创建一个新的 resources """ query = self._build_query() files = {'data': open(file, 'rb')} # 经过测试 props 中只有 title 和 id 有作用,其他的参数都无效 data = {'props': json.dumps(kwargs)} logger.info(f'向 Joplin 增加 resource {file} {kwargs}') resp = self.client.post('/resources', params=query, files=files, data=data) data = resp.json() if data.get('error'): logger.error(data['error']) raise ValueError(data['error']) return JoplinResource(**data, resource_type=resource_type)
def convert_l2f(self, documents: list[WizDocument]) -> None: """ 将为知笔记中的所有 location 转换成中间格式,等待生成 Joplin Folder """ sql = 'SELECT location, title, parent_location, level, id, parent_id FROM l2f;' l2f_items = self.conn.execute(sql).fetchall() logger.info(f'在数据库 l2f 中找到 {len(l2f_items)} 条记录。') # 用 location 作为唯一 key self.l2f_cache = {} for l2f_item in l2f_items: self.l2f_cache[l2f_item[0]] = Location2Folder(*l2f_item) for document in documents: self.build_location_to_top(document.location, document)
def sync_note_by_location(self, location: str, with_children: bool = True) -> None: """ 同步指定为知笔记目录中所有的笔记 """ self.sync_folders() self.sync_tags() locations = [location] if with_children: self._get_locations(location, locations) logger.info(f'处理以下 location: {locations}') waiting_for_sync = [ wd for wd in self.ws.documents if wd.location in locations ] logger.info(f'为知笔记目录 {location} 中有 {len(waiting_for_sync)} 篇笔记等待同步。') for wd in waiting_for_sync: self._sync_note(wd)
def load_notes(self) -> None: """ 从数据库中载入已经同步的 note """ sql = 'SELECT note_id, title, joplin_folder, markup_language, wiz_location FROM note;' items = self.conn.execute(sql).fetchall() logger.info(f'在数据库 note 中找到 {len(items)} 条记录。') self.notes = {} for item in items: jn = JoplinNote(item[0], item[1], item[2], item[3], location=item[4]) jn.folder = self.folders[jn.parent_id] jn.internal_links = self.get_internal_links(jn.id) jn.resources = self.get_resources(jn.internal_links) jn.tags = self.get_tags(jn.id) self.notes[jn.id] = jn
def sync_tags(self) -> None: """ 同步为知笔记的 tag 到 Joplin Tag """ created_keys = self.cu.tags.keys() waiting_create_tags = [ wt for wt in self.ws.tags if not tojoplinid(wt.guid) in created_keys ] logger.info(f'为知笔记共有 {len(self.ws.tags)} 个 tag 。') logger.info(f'有 {len(waiting_create_tags)} 个 tag 等待同步。') for wt in waiting_create_tags: tag_id = tojoplinid(wt.guid) try: logger.info(f'处理 tag {wt.name} {tag_id}') jt = self.jda.post_tag(id=tag_id, title=wt.name, created_time=wt.modified, updated_time=wt.modified) self.cu.add_tag(jt) except ValueError as e: logger.error(e) # 由于加入的 tag 没有写入转换数据库导致的 guid 重复错误,此时需要将 tag 写入转换数据库 if str(e).find( 'SQLITE_CONSTRAINT: UNIQUE constraint failed') > -1: jt = self.jda.get_tag(tag_id) self.cu.add_tag(jt) continue
def _sync_note(self, document: WizDocument) -> JoplinNote: """ 同步一篇笔记 """ logger.info(f'正在处理 document {document.guid}|{document.title}|。') note_id = tojoplinid(document.guid) jn: JoplinNote = self.cu.get_note(note_id) if jn is not None: logger.warning(f'note {jn.id} |{jn.title}| 已经存在!') return # 临时保存上传成功后生成的 Image 和 Attachment 对应的 Joplin Resource resources_in_note: dict[str, JoplinResource] = {} # 为知笔记中的图像不在内链中,附件也可能不在内链中,将它们全部加入内链。 # 附件即使已经包含在内链中了,也需要在 body 末尾再加上一个内链 joplin_internal_links: dict[str, JoplinInternalLink] = {} # 处理为知笔记文档中已经包含的内链 for wil in document.internal_links: resource_id = tojoplinid(wil.guid) jil: JoplinInternalLink = JoplinInternalLink( note_id, resource_id, wil.title, wil.link_type, wil.outerhtml) joplin_internal_links[jil.id] = jil # 上传附件 for attachment in document.attachments: jr: JoplinResource = self._upload_wiz_attachment(attachment) resources_in_note[jr.id] = jr jil_id = f'{note_id}-{jr.id}' jil: JoplinInternalLink = joplin_internal_links.get(jil_id) if jil is not None: logger.warning(f'内链关系 {jil_id} 已存在!') continue # 这个附件在附件列表中存在,但是在 body 中不存在,此时没有 outerhtml,需要在转换时将这个附件添加到 body 末尾 jil: JoplinInternalLink = JoplinInternalLink( note_id, jr.id, jr.title, 'open_attachment') joplin_internal_links[jil.id] = jil # 上传图像,将每个文档中的图像生成为 Jopin 中的资源 for image in document.images: jr: JoplinResource = self._upload_wiz_image(image) resources_in_note[jr.id] = jr jil: JoplinInternalLink = JoplinInternalLink( note_id, jr.id, jr.title, 'image', image.outerhtml) joplin_internal_links[jil.id] = jil # 创建一个 joplin note 并将 wiz document 的对应值存入 body = convert_joplin_body(document.body, document.is_markdown, joplin_internal_links.values()) folder = self.cu.get_folder(location=document.location) note: JoplinNote = self.jda.post_note(note_id, document.title, body, document.is_markdown, folder.id, document.url) note.internal_links = joplin_internal_links note.folder = folder note.tags = self.cu.get_tags(note.id) self.cu.add_note(note) return note