class RequestService: def __init__(self): self.dbHandler = dbfry.getInterface('mongodb', configs.db) self.requestAPI = RequestAPI() self.collection = 'douban_book' pass def search_books(self, keyword, tag='', offset=0, limit=1): _res = self.requestAPI.search_books(keyword, tag, offset, limit) if _res.has_key('books'): try: self.dbHandler.connect() for _book in _res['books']: self.dbHandler.insert(self.collection, _book) except BaseException as e: logging.error('error save search books') logging.exception(traceback.format_exc()) finally: self.dbHandler.disconnect() return _res def search_book_by_isbn(self, isbn): _book = None try: self.dbHandler.connect() _book = self.dbHandler.find_one(self.collection, {'isbn13': str(isbn)}) except BaseException as e: logging.error('error search book by isbn') logging.exception(traceback.format_exc()) finally: self.dbHandler.disconnect() if _book is None: _book = self.requestAPI.get_book_by_isbn( str(isbn)) # ('9787532706907') return _book def get_book_reviews(self, id, offset=0, limit=5, orderby_time=False): _reviews = None _reviews = self.requestAPI.get_book_reviews(id, offset=offset, limit=limit, orderby_time=orderby_time) if _reviews is None: return {} return _reviews def get_ratings(self, id): _ratings = self.requestAPI.get_ratings(id) return _ratings
class RequestService: def __init__(self): self.dbHandler = dbfry.getInterface('mongodb', configs.db) self.requestAPI = RequestAPI() self.collection = 'douban_book' pass def search_books(self, keyword, tag='', offset=0, limit=1): _res = self.requestAPI.search_books(keyword, tag, offset, limit) if _res.has_key('books'): try: self.dbHandler.connect() for _book in _res['books']: self.dbHandler.insert(self.collection, _book) except BaseException as e: logging.error('error save search books') logging.exception(traceback.format_exc()) finally: self.dbHandler.disconnect() return _res def search_book_by_isbn(self, isbn): _book = None try: self.dbHandler.connect() _book = self.dbHandler.find_one(self.collection, {'isbn13' : str(isbn)}) except BaseException as e: logging.error('error search book by isbn') logging.exception(traceback.format_exc()) finally: self.dbHandler.disconnect() if _book is None: _book = self.requestAPI.get_book_by_isbn(str(isbn)) # ('9787532706907') return _book def get_book_reviews(self,id, offset=0, limit=5, orderby_time=False): _reviews = None _reviews = self.requestAPI.get_book_reviews(id, offset=offset, limit=limit, orderby_time=orderby_time) if _reviews is None: return {} return _reviews def get_ratings(self,id): _ratings = self.requestAPI.get_ratings(id) return _ratings
class RequestService: def __init__(self): self.requestAPI = RequestAPI() pass def search_comment(self, keyword): _x = self.requestAPI.search_comment(keyword) _regex = '<div class="all_list_node">\s+<div[^>]+>\s+<a[^>]+>\s+<img([^>]+)>\s+</a>\s+</div>\s+<div[^>]+>\s+<div[^>]+>\s+<a[^>]+>([^<]+)</a><span[^<]+</span>\s+<div[^>]+>(.*?)</div>\s+</div>\s+<div[^>]+>\s+<font[^>]+>([^<]+)</font>\s+<span[^>]+>(.*?)</span>\s+</div>' _regex = '<div class="all_list_node">\s+<div[^>]+>\s+<a[^>]+>\s+<img.*?data-src="([^"]+)"[^>]+>\s+</a>\s+</div>\s+<div[^>]+>\s+<div[^>]+>\s+<a[^>]+>([^<]+)</a><span[^<]+</span>\s+<div[^>]+>(.*?)</div>\s+</div>\s+<div[^>]+>\s+<font[^>]+>([^<]+)</font>\s+<span[^>]+>(.*?)</span>\s+</div>' _ret = [] _content_map = {'img' : 0, 'user' : 1, 'comment' : 2, 'time' : 3, 'num' : 4} for _node in re.findall(_regex, _x): _temp = {} for _i in _content_map: if _i == 'num': _temp[_i] = {} _temp[_i]['zhan'] = _node[_content_map[_i]] continue _temp[_i] = _node[_content_map[_i]] _ret.append(_temp) return _ret def _parse_comment_node(self, node): _rs = {} for _n in node: if _n.attrib['class'] == 'node_head': _img = _n.find('a').find('img') if _img: _rs['img'] = _img.attrib['data-src'] pass elif _n.attrib['class'] == 'node_content_all': pass return _rs
def __init__(self): self.dbHandler = dbfry.getInterface('mongodb', configs.db) self.requestAPI = RequestAPI() self.collection = 'douban_book' pass
def __init__(self): self.requestAPI = RequestAPI() pass
class RequestService: def __init__(self): self.requestAPI = RequestAPI() pass def search_comment(self, keyword): _x = self.requestAPI.search_comment(keyword) # _regex = 'STK && STK.pageletM && STK.pageletM.view\({([^}]+)}\)' _regex = 'STK && STK.pageletM && STK.pageletM.view\({"pid":"pl_wb_feedlist",.*?"html":([^}]+)}\)' # _node_regex = '<dl[^>]+>.*?<dt[^>]+>.*?<a[^>]+>.*?<img.*?src=\\\\"([^"]+)"[^>]+>.*?<\\\\/a>.*?<\\\\/dt>.*?<dd[^>]+>.*?<p[^>]+>(.*?)<\\\\/p>.*?<ul[^>]+>.*?<\\\\/ul>.*?<dl[^>]+>.*?<\\\\/dl>.*?<p[^>]+>.*?<span>(.*?)<\\\\/span>.*?<a[^>]+>(.*?)<\\\\/a>.*?<a[^>]+>(.*?)<\\\\/a>' _node_regex = '<dl[^>]+>.*?<dt[^>]+>.*?<a[^>]+>.*?<img.*?src=\\\\"([^"]+)"[^>]+>.*?<\\\\/a>.*?<\\\\/dt>.*?<dd[^>]+>.*?<p[^>]+>.*?<a[^>]+>(.*?)<a[^>]+>.*?<\\\\/a>.*?<\\\\/a>.*?<em>(.*?)<\\\\/em>.*?<\\\\/p>.*?<ul[^>]+>.*?<\\\\/ul>.*?<dl[^>]+>.*?<\\\\/dl>.*?<p[^>]+>.*?<span>(.*?)<\\\\/span>.*?<a[^>]+>(.*?)<\\\\/a>.*?<a[^>]+>(.*?)<\\\\/a>' #_num_regex = '<a[^>]+>.*?<em[^>]+>.*<\\\\/em>(.*?)</a>.*<a[^>]>(.*?)<\\\\/a>.*?<a[^>]>(.*?)<\\\\/a>' _num_regex = [ r'\\u8d5e<\\/em>\(?(\d*)\)?.*\\u8f6c\\u53d1\(?(\d*)\)?.*\\u8bc4\\u8bba\(?(\d*)\)?', r'\\\\u8d5e<\\\\/em>\(?(\d*)\)?.*\\\\u8f6c\\\\u53d1\(?(\d*)\)?.*\\\\u8bc4\\\\u8bba\(?(\d*)\)?', ] _num_map = {1: 'praises', 2: 'retweets', 3: 'reviews'} _ret = [] for _node in re.findall(_regex, _x): for _nd in re.findall(_node_regex, _node): _temp = {} _temp['img'] = _nd[0] _temp['user'] = _nd[1] _temp['comment'] = _nd[2] _temp['num'] = {} _idx = 0 # modified by niuben at 2014-03-19 m = re.search(_num_regex[0], _nd[3]) if (m is None): m = re.search(_num_regex[1], _nd[3]) if (m is not None): _num_list = [m.group(1), m.group(2), m.group(3)] else: _num_list = [0, 0, 0] for _num in _num_list: _idx += 1 _t = _num_map.get(_idx) if _t is not None: _temp['num'][_t] = _num _temp['time'] = _nd[4] _temp['from'] = _nd[5] _ret.append(_temp) return _ret def _parse_comment_node(self, node): _rs = {} for _n in node: if _n.attrib['class'] == 'node_head': _img = _n.find('a').find('img') if _img: _rs['img'] = _img.attrib['data-src'] pass elif _n.attrib['class'] == 'node_content_all': pass return _rs
class RequestService: def __init__(self): self.requestAPI = RequestAPI() pass def search_comment(self, keyword): _x = self.requestAPI.search_comment(keyword) # _regex = 'STK && STK.pageletM && STK.pageletM.view\({([^}]+)}\)' _regex = 'STK && STK.pageletM && STK.pageletM.view\({"pid":"pl_wb_feedlist",.*?"html":([^}]+)}\)' # _node_regex = '<dl[^>]+>.*?<dt[^>]+>.*?<a[^>]+>.*?<img.*?src=\\\\"([^"]+)"[^>]+>.*?<\\\\/a>.*?<\\\\/dt>.*?<dd[^>]+>.*?<p[^>]+>(.*?)<\\\\/p>.*?<ul[^>]+>.*?<\\\\/ul>.*?<dl[^>]+>.*?<\\\\/dl>.*?<p[^>]+>.*?<span>(.*?)<\\\\/span>.*?<a[^>]+>(.*?)<\\\\/a>.*?<a[^>]+>(.*?)<\\\\/a>' _node_regex = '<dl[^>]+>.*?<dt[^>]+>.*?<a[^>]+>.*?<img.*?src=\\\\"([^"]+)"[^>]+>.*?<\\\\/a>.*?<\\\\/dt>.*?<dd[^>]+>.*?<p[^>]+>.*?<a[^>]+>(.*?)<a[^>]+>.*?<\\\\/a>.*?<\\\\/a>.*?<em>(.*?)<\\\\/em>.*?<\\\\/p>.*?<ul[^>]+>.*?<\\\\/ul>.*?<dl[^>]+>.*?<\\\\/dl>.*?<p[^>]+>.*?<span>(.*?)<\\\\/span>.*?<a[^>]+>(.*?)<\\\\/a>.*?<a[^>]+>(.*?)<\\\\/a>' #_num_regex = '<a[^>]+>.*?<em[^>]+>.*<\\\\/em>(.*?)</a>.*<a[^>]>(.*?)<\\\\/a>.*?<a[^>]>(.*?)<\\\\/a>' _num_regex = [ r'\\u8d5e<\\/em>\(?(\d*)\)?.*\\u8f6c\\u53d1\(?(\d*)\)?.*\\u8bc4\\u8bba\(?(\d*)\)?', r'\\\\u8d5e<\\\\/em>\(?(\d*)\)?.*\\\\u8f6c\\\\u53d1\(?(\d*)\)?.*\\\\u8bc4\\\\u8bba\(?(\d*)\)?', ] _num_map = {1: 'praises', 2: 'retweets', 3: 'reviews'} _ret = [] for _node in re.findall(_regex, _x): for _nd in re.findall(_node_regex, _node): _temp = {} _temp['img'] = _nd[0] _temp['user'] = _nd[1] _temp['comment'] = _nd[2] _temp['num'] = {} _idx = 0 # modified by niuben at 2014-03-19 m = re.search(_num_regex[0], _nd[3]) if (m is None): m = re.search(_num_regex[1], _nd[3]) if (m is not None): _num_list = [m.group(1),m.group(2),m.group(3)] else: _num_list = [0,0,0] for _num in _num_list: _idx += 1 _t = _num_map.get(_idx) if _t is not None: _temp['num'][_t] = _num _temp['time'] = _nd[4] _temp['from'] = _nd[5] _ret.append(_temp) return _ret def _parse_comment_node(self, node): _rs = {} for _n in node: if _n.attrib['class'] == 'node_head': _img = _n.find('a').find('img') if _img: _rs['img'] = _img.attrib['data-src'] pass elif _n.attrib['class'] == 'node_content_all': pass return _rs