def insert_info(info): """向数据库中插入书籍信息 """ with closing(db_pool.connection()) as db: # 更新作者 for item in info['author'].split(','): db.execute(""" REPLACE INTO `bookwriter` (`writer`, `book`) VALUES (%s, %s)""", item, info['name']) # 更新书信息 db.execute(""" REPLACE INTO `bookinfo` (`name`, `publisher`, `description`, `image`, `isbn`, `pages`, `year`, `language`, `fileformat`, `download`, `url`, `bookid`) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""", info['name'], info['publisher'], info['description'], info['image'], info['isbn'], info['pages'], info['year'], info['language'], info['bookformat'], info['download'], info['url'], info['bookid']) # 更新相关信息 for item in info['relations']: db.execute(""" REPLACE INTO `bookrelated` (`origin`, `related`) VALUES (%s, %s)""", info['name'], item)
def get_untrack_books(max_id): with closing(db_pool.connection()) as db: id_row = db.query(""" SELECT `bookid` FROM `detail`""") id_in = [int(x['bookid']) for x in id_row] id_target = list(set(range(max_id)) - set(id_in)) print "length of target is ", len(id_target) return id_target
def select_book_isbn(): """获取还没从豆瓣获取书籍的isbn """ with closing(db_pool.connection()) as db: isbn_raw = db.query("""SELECT `isbn` FROM `detail` WHERE `isbn10` IS NULL""") return isbn_raw
def insert(info, isbn): """从API获得的信息分类存入数据库 """ bookname = info.get('title') if not bookname: return 'Title not Found' bookname = bookname.get('$t') summary = '' if info.get('summary'): summary = info.get('summary').get('$t') r_average, r_max, r_min, r_count = get_rating(info.get('gd:rating')) api, url = get_douban_url(info.get('link')) isbn10, isbn13, pages, publisher, price, pubdate = get_book(info.get('db:attribute')) pubdate_find = re.findall(r'(\d+)', pubdate) if len(pubdate_find) != 3: pubdate = re.findall(r'(\d{4})', pubdate)[0] + "-1-1" with closing(db_pool.connection()) as db: db.execute(""" UPDATE `detail` SET `isbn10`=%s, `isbn13`=%s, `price`=%s, `pubdate`=%s, `rateAVE`=%s, `rateMAX`=%s, `rateMIN`=%s, `rateCount`=%s, `doubanAPI`=%s, `doubanURL`=%s, `summary`=%s WHERE `isbn`=%s """, isbn10, isbn13, price, pubdate, r_average, r_max, r_min, r_count, api, url, summary, isbn) insert_tags(info.get('db:tag'), bookname, isbn)
def save_image_url_to_mysql(realimage, bookid): """在数据库存储真正的url地址 """ with closing(db_pool.connection()) as db: db.execute(""" UPDATE `bookinfo` SET `realimage` = %s WHERE `url` LIKE '%%/%s/%%'""", realimage, bookid) return True
def save_book_size(bookid, size): """设置书籍文件大小 """ with closing(db_pool.connection()) as db: db.execute(""" UPDATE `detail` SET `filesize` = %s WHERE `bookid` = %s """, str(size), bookid)
def books_without_realimage(): """找十个realimage为Null的书ID """ with closing(db_pool.connection()) as db: bookids = db.query(""" SELECT `bookid` FROM `bookinfo` WHERE `realimage` IS NULL LIMIT 10""") return [item['bookid'] for item in bookids]
def get_book_url(bookid): """获取书的真实存储地址和页面地址 """ with closing(db_pool.connection()) as db: urls = db.get(""" SELECT `download`, `url` FROM `bookinfo` WHERE `bookid` = %s """, bookid) if urls: return urls['download'], urls['url']
def get_img_url(): """获取未存储书的封面图片地址 """ with closing(db_pool.connection()) as db: img_raw = db.query(""" SELECT `ebookImage`, `id` FROM `detail` WHERE `imageURL` IS NULL""") print '取到', len(img_raw), '条数据' return img_raw
def get_book_url(bookid): """获取书的真实存储地址和页面地址 """ print "bookid", bookid with closing(db_pool.connection()) as db: urls = db.get(""" SELECT `downloadURL`, `ebookURL` FROM `detail` WHERE `bookid` = %s """, bookid) if urls: return urls['downloadURL'], urls['ebookURL']
def insert_info(info): """ 最原始的抓取的信息入库 """ with closing(db_pool.connection()) as db: for item in info: db.execute("""REPLACE INTO `jandan` (`item`, `content`, `votes`) VALUES (%s, %s, %s)""", *item) return 'success'
def get_img_url(bookid): """获取指定书的封面图片地址 """ with closing(db_pool.connection()) as db: img_raw = db.get(""" SELECT `image`, `name` FROM `bookinfo` WHERE `url` LIKE '%%/%s/%%' """, bookid) img = img_raw.get('image') if img: return config.ITEBOOKS_URL + img, img_raw.get('name') + '.jpg' return None
def insert_tags(tags, bookname, isbn): """插入书籍标签 """ if not tags: return None with closing(db_pool.connection()) as db: for item in tags: tag = item.get('@name').strip() count = item.get('@count') if (not tag) or (not count) or (len(tag) > 30): continue db.execute(""" REPLACE INTO `tag` (`isbn`, `name`, `tag`, `count`) VALUES (%s, %s, %s, %s)""", isbn, bookname, tag, count)
def get_unparsed_content(): """ 找点还p_count是NULL的数据行 返回还有多少行的数据是空的 """ rows_per_time = 2 with closing(db_pool.connection()) as db: raw = db.get("""SELECT COUNT(*) AS `count` FROM `jandan` WHERE `p_count` IS NULL """) raw_left = raw['count'] - rows_per_time contents = db.query("""SELECT `item`, `content` FROM `jandan` WHERE `p_count` IS NULL LIMIT %s""", rows_per_time) return contents, raw_left
def get_unparsed_vote(): """ 找点vote还是空的数据行 返回还有多少行的数据是空的 """ rows_per_time = 30 with closing(db_pool.connection()) as db: raw = db.get("""SELECT COUNT(*) AS `count` FROM `jandan` WHERE `support` IS NULL OR `unsupport` IS NULL """) raw_left = raw['count'] - rows_per_time votes = db.query("""SELECT `item`, `votes` FROM `jandan` WHERE `support` IS NULL OR `unsupport` IS NULL LIMIT %s""", rows_per_time) return votes, raw_left
def insert(info, isbn): """从API获得的信息分类存入数据库 """ bookname = info.get("title") if not bookname: return "Title not Found" bookname = bookname.get("$t") insert_tags(info.get("db:tag"), bookname) summary = "" if info.get("summary"): summary = info.get("summary").get("$t") r_average, r_max, r_min, r_count = get_rating(info.get("gd:rating")) api, url = get_douban_url(info.get("link")) isbn10, isbn13, pages, publisher, price, pubdate = get_book(info.get("db:attribute")) with closing(db_pool.connection()) as db: db.execute( """ REPLACE INTO `douban_bookinfo` (`isbn`, `isbn10`, `isbn13`, `pages`, `price`, `publisher`, `pubdate`, `rate_average`, `rate_max`, `rate_min`, `rate_count`, `douban_api`, `douban_url`, `summary`, `bookname`) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) """, isbn, isbn10, isbn13, pages, price, publisher, pubdate, r_average, r_max, r_min, r_count, api, url, summary, bookname, )
def insert_votes(votes): """ 支持与反对数入库 """ sql_head = 'UPDATE `jandan` SET `support` = CASE `item`\n' sql_middle = '\nEND,\n`unsupport` = CASE `item`\n' sql_end = '\nEND\nWHERE `item` IN ({})'.format(', '.join([x['item'] for x in votes])) support_list = '\n'.join( ['WHEN "{}" THEN "{}"'.format(x['item'], x['support']) for x in votes]) unsupport_list = '\n'.join( ['WHEN "{}" THEN "{}"'.format(x['item'], x['unsupport']) for x in votes]) sql = sql_head + support_list + sql_middle + unsupport_list + sql_end with closing(db_pool.connection()) as db: db.execute(sql) return sql
def update_content(contents): """ 格式化后的内容与替换过的图片入库 """ sql_head = 'UPDATE `jandan` SET `p_count` = CASE `item`\n' sql_middle = '\nEND,\n`content` = CASE `item`\n' sql_end = '\nEND\nWHERE `item` IN ({})'.format(', '.join([x['item'] for x in contents])) p_count_list = '\n'.join( ['WHEN "{}" THEN "{}"'.format(x['item'], x['p_count']) for x in contents]) content_list = '\n'.join( ['WHEN "{}" THEN "{}"'.format(x['item'], x['content']) for x in contents]) sql = sql_head + p_count_list + sql_middle + content_list + sql_end with closing(db_pool.connection()) as db: db.execute(sql) return sql
def select_unfetch_book_isbn(): """获取1个还没从豆瓣获取书籍的isbn """ fetch_id = kv.get("douban::fetch:id") if not fetch_id: fetch_id = 0 with closing(db_pool.connection()) as db: book_count_row = db.get(" SELECT COUNT(*) AS `count` FROM `bookinfo`") book_count = book_count_row["count"] if fetch_id > book_count: fetch_id = 0 isbn_raw = db.get( """ SELECT `bookinfo`.`isbn` FROM `bookinfo` LIMIT %s, 1""", fetch_id, ) kv.set("douban::fetch:id", fetch_id + 1) return isbn_raw["isbn"]
def save_image_url_to_mysql(url, bookid): """在数据库存储真正的url地址 """ with closing(db_pool.connection()) as db: db.execute(""" UPDATE `detail` SET `imageURL` = %s WHERE `id` = %s""", url, bookid)
def get_book_undownload(): with closing(db_pool.connection()) as db: bookids = db.query(""" SELECT `bookid` FROM `detail` WHERE `filesize` IS NULL""") bookids = [x["bookid"] for x in bookids] return bookids