def mark_start(cls, feed_id): """ 标记该feed开始抓取 """ with connection() as conn: sql = text('UPDATE `feed` SET `status`=0 WHERE `id`=:feed_id') conn.execute(sql, {'feed_id': feed_id})
def mark_fail(cls, feed_id): """ 记录抓取失败 """ now =get_current_dt() with connection() as conn: sql = text('UPDATE `feed` SET `collect_dt`=:collect_dt, `status`=0 WHERE `id`=:feed_id') conn.execute(sql, {'feed_id': feed_id, 'collect_dt': now})
def read(cls, entry_id): """ 标记为已读 """ with connection() as conn: sql = text("UPDATE entry " "SET `unread`=0 " "WHERE `id`=:entry_id") conn.execute(sql, {'entry_id': entry_id})
def get_subscribed(cls): """ 获取订阅中的feed Return: list: 每个元素是tuple(feed_id, url, title) """ with connection() as conn: sql = text('SELECT id, link, title FROM `feed` WHERE `subscribed`=1') result = conn.execute(sql) return [(item[0], item[1], item[2]) for item in result]
def get_unread_count_by_feed(cls, feed_id): """ 获取某个feed所有未读的entry的数量 """ with connection() as conn: sql = text( "SELECT count(1) AS `cnt` FROM entry WHERE `feed_id`=:feed_id AND `unread`=1" ) result = conn.execute(sql, {'feed_id': feed_id}).fetchone() if result: return result[0] else: return 0
def update(cls, feed_id, title, updated): """ 更新feed Args: feed_id: Feed ID title: 标题 updated: 最近更新时间 collect_dt: 最近抓取时间 """ with connection() as conn: sql = text('UPDATE feed SET `title`=:title, `updated`=:updated WHERE `id`=:feed_id') conn.execute(sql, {'feed_id': feed_id, 'title': title, 'updated': updated})
def get(cls, feed_id): """ 根据id查询 """ with connection() as conn: sql = text('SELECT id, title, link, collect_dt FROM feed WHERE `id`=:feed_id AND `subscribed`=1') result = conn.execute(sql, {'feed_id': feed_id}).first() if result: return { 'id': result[0], 'title': result[1], 'link': result[2], 'collect_dt': result[3], } else: return None
def create(cls, title, link, updated, collect_dt): """ 新建一个feed Args: title: 标题 link: 地址 updated: 最近更新时间 collect_dt: 最近抓取时间 Return: 这条feed的ID """ with connection() as conn: sql = text('INSERT feed SET `title`=:title, `link`=:link, `updated`=:updated, `collect_dt`=:collect_dt ' 'ON DUPLICATE KEY UPDATE `subscribed`=1, `collect_dt`=:collect_dt') result = conn.execute(sql, {'title': title, 'link': link, 'updated': updated, 'collect_dt': collect_dt}) return result.lastrowid
def get_unread_entries_by_feed(cls, feed_id): """ 根据feed获取未读entry的列表 """ with connection() as conn: sql = text("SELECT id, title, updated, link " "FROM entry " "WHERE `feed_id`=:feed_id AND `unread`=1 " "ORDER BY updated DESC") result = conn.execute(sql, {'feed_id': feed_id}).fetchall() if result: return [{ 'id': item[0], 'title': item[1], 'updated': item[2], 'link': item[3] } for item in result] else: return None
def update(cls, title, updated, link, hash_code, feed_id, collect_dt): """更新一条entry记录 如果entry的相关记录已存在于数据库中则更新记录,否则插入新纪录 Args: title: 标题 updated: 最后更新时间 link: 原文连接 hash_code: 哈希值,用于校验唯一性 feed_id: 该entry所属的feed的ID collect_dt: 收集时间 """ with connection() as conn: unique_id = cls._generate_unique_id() sql = text( "INSERT IGNORE entry SET `id`=:id, `title`=:title, `link`=:link, `hcode`=:hash_code, `updated`=:updated, `feed_id`=:feed_id, `collect_dt`=:collect_dt" ) result = conn.execute( sql, { "title": title, "link": link, "hash_code": hash_code, "updated": updated, "feed_id": feed_id, "collect_dt": collect_dt, "id": unique_id }) if result == 0: # 该entry已存在,看是否需要更新 sql = text( "UPDATE entry " "SET `title`=:title, `updated`=:updated, `link`=:link, `collect_dt`=:collect_dt, `hash_code`=:hash_code" "WHERE `link`=:link AND `hash_code`<>:hash_code") conn.execute( sql, { "title": title, "link": link, "hash_code": hash_code, "updated": updated, "collect_dt": collect_dt })