Exemplo n.º 1
0
class RequestService:
    def __init__(self):
        self.dbHandler = dbfry.getInterface('mongodb', configs.db)
        self.requestAPI = RequestAPI()
        self.collection = 'douban_book'
        pass

    def search_books(self, keyword, tag='', offset=0, limit=1):
        _res = self.requestAPI.search_books(keyword, tag, offset, limit)
        if _res.has_key('books'):
            try:
                self.dbHandler.connect()
                for _book in _res['books']:
                    self.dbHandler.insert(self.collection, _book)
            except BaseException as e:
                logging.error('error save search books')
                logging.exception(traceback.format_exc())
            finally:
                self.dbHandler.disconnect()
        return _res

    def search_book_by_isbn(self, isbn):
        _book = None
        try:
            self.dbHandler.connect()
            _book = self.dbHandler.find_one(self.collection,
                                            {'isbn13': str(isbn)})
        except BaseException as e:
            logging.error('error search book by isbn')
            logging.exception(traceback.format_exc())

        finally:
            self.dbHandler.disconnect()

        if _book is None:
            _book = self.requestAPI.get_book_by_isbn(
                str(isbn))  # ('9787532706907')

        return _book

    def get_book_reviews(self, id, offset=0, limit=5, orderby_time=False):
        _reviews = None
        _reviews = self.requestAPI.get_book_reviews(id,
                                                    offset=offset,
                                                    limit=limit,
                                                    orderby_time=orderby_time)

        if _reviews is None:
            return {}
        return _reviews

    def get_ratings(self, id):
        _ratings = self.requestAPI.get_ratings(id)

        return _ratings
Exemplo n.º 2
0
class RequestService:

    def __init__(self):
        self.dbHandler = dbfry.getInterface('mongodb', configs.db)
        self.requestAPI = RequestAPI()
        self.collection = 'douban_book'
        pass
        
    def search_books(self, keyword, tag='', offset=0, limit=1):
        _res = self.requestAPI.search_books(keyword, tag, offset, limit)
        if _res.has_key('books'):
            try:
                self.dbHandler.connect()
                for _book in _res['books']:
                    self.dbHandler.insert(self.collection, _book)
            except BaseException as e:
                logging.error('error save search books')
                logging.exception(traceback.format_exc())
            finally:
                self.dbHandler.disconnect()
        return _res
        
    def search_book_by_isbn(self, isbn):
        _book = None
        try:
            self.dbHandler.connect()
            _book = self.dbHandler.find_one(self.collection, {'isbn13' : str(isbn)})
        except BaseException as e:
            logging.error('error search book by isbn')
            logging.exception(traceback.format_exc())

        finally:
            self.dbHandler.disconnect()

        if _book is None:
            _book = self.requestAPI.get_book_by_isbn(str(isbn))  # ('9787532706907')
            
        return _book

    def get_book_reviews(self,id, offset=0, limit=5, orderby_time=False):
        _reviews = None
        _reviews = self.requestAPI.get_book_reviews(id, offset=offset, limit=limit, orderby_time=orderby_time)
        
        if _reviews is None:
            return {}
        return _reviews
        
    def get_ratings(self,id):
        _ratings = self.requestAPI.get_ratings(id)
        
        return _ratings
Exemplo n.º 3
0
class RequestService:
    
    def __init__(self):
        self.requestAPI = RequestAPI()
        pass
        
    def search_comment(self, keyword):

        _x = self.requestAPI.search_comment(keyword)
        _regex = '<div class="all_list_node">\s+<div[^>]+>\s+<a[^>]+>\s+<img([^>]+)>\s+</a>\s+</div>\s+<div[^>]+>\s+<div[^>]+>\s+<a[^>]+>([^<]+)</a><span[^<]+</span>\s+<div[^>]+>(.*?)</div>\s+</div>\s+<div[^>]+>\s+<font[^>]+>([^<]+)</font>\s+<span[^>]+>(.*?)</span>\s+</div>'
        _regex = '<div class="all_list_node">\s+<div[^>]+>\s+<a[^>]+>\s+<img.*?data-src="([^"]+)"[^>]+>\s+</a>\s+</div>\s+<div[^>]+>\s+<div[^>]+>\s+<a[^>]+>([^<]+)</a><span[^<]+</span>\s+<div[^>]+>(.*?)</div>\s+</div>\s+<div[^>]+>\s+<font[^>]+>([^<]+)</font>\s+<span[^>]+>(.*?)</span>\s+</div>'
        _ret = []
        _content_map = {'img' : 0, 'user' :  1, 'comment' : 2, 'time' : 3, 'num' : 4}
        for _node in re.findall(_regex, _x):
            _temp = {}
            for _i in _content_map:
                if _i == 'num':
                    _temp[_i] = {}
                    _temp[_i]['zhan'] = _node[_content_map[_i]]
                    continue
                _temp[_i] = _node[_content_map[_i]]
            _ret.append(_temp)
        return _ret

    def _parse_comment_node(self, node):
        _rs = {}
        for _n in node:
            if _n.attrib['class'] == 'node_head':
                _img = _n.find('a').find('img')
                if _img:
                    _rs['img'] = _img.attrib['data-src']
                pass
            elif _n.attrib['class'] == 'node_content_all':
                pass

        return _rs
Exemplo n.º 4
0
 def __init__(self):
     self.dbHandler = dbfry.getInterface('mongodb', configs.db)
     self.requestAPI = RequestAPI()
     self.collection = 'douban_book'
     pass
Exemplo n.º 5
0
 def __init__(self):
     self.requestAPI = RequestAPI()
     pass
Exemplo n.º 6
0
class RequestService:
    def __init__(self):
        self.requestAPI = RequestAPI()
        pass

    def search_comment(self, keyword):

        _x = self.requestAPI.search_comment(keyword)
        #        _regex = 'STK && STK.pageletM && STK.pageletM.view\({([^}]+)}\)'
        _regex = 'STK && STK.pageletM && STK.pageletM.view\({"pid":"pl_wb_feedlist",.*?"html":([^}]+)}\)'
        #        _node_regex = '<dl[^>]+>.*?<dt[^>]+>.*?<a[^>]+>.*?<img.*?src=\\\\"([^"]+)"[^>]+>.*?<\\\\/a>.*?<\\\\/dt>.*?<dd[^>]+>.*?<p[^>]+>(.*?)<\\\\/p>.*?<ul[^>]+>.*?<\\\\/ul>.*?<dl[^>]+>.*?<\\\\/dl>.*?<p[^>]+>.*?<span>(.*?)<\\\\/span>.*?<a[^>]+>(.*?)<\\\\/a>.*?<a[^>]+>(.*?)<\\\\/a>'
        _node_regex = '<dl[^>]+>.*?<dt[^>]+>.*?<a[^>]+>.*?<img.*?src=\\\\"([^"]+)"[^>]+>.*?<\\\\/a>.*?<\\\\/dt>.*?<dd[^>]+>.*?<p[^>]+>.*?<a[^>]+>(.*?)<a[^>]+>.*?<\\\\/a>.*?<\\\\/a>.*?<em>(.*?)<\\\\/em>.*?<\\\\/p>.*?<ul[^>]+>.*?<\\\\/ul>.*?<dl[^>]+>.*?<\\\\/dl>.*?<p[^>]+>.*?<span>(.*?)<\\\\/span>.*?<a[^>]+>(.*?)<\\\\/a>.*?<a[^>]+>(.*?)<\\\\/a>'
        #_num_regex = '<a[^>]+>.*?<em[^>]+>.*<\\\\/em>(.*?)</a>.*<a[^>]>(.*?)<\\\\/a>.*?<a[^>]>(.*?)<\\\\/a>'
        _num_regex = [
            r'\\u8d5e<\\/em>\(?(\d*)\)?.*\\u8f6c\\u53d1\(?(\d*)\)?.*\\u8bc4\\u8bba\(?(\d*)\)?',
            r'\\\\u8d5e<\\\\/em>\(?(\d*)\)?.*\\\\u8f6c\\\\u53d1\(?(\d*)\)?.*\\\\u8bc4\\\\u8bba\(?(\d*)\)?',
        ]
        _num_map = {1: 'praises', 2: 'retweets', 3: 'reviews'}
        _ret = []

        for _node in re.findall(_regex, _x):
            for _nd in re.findall(_node_regex, _node):
                _temp = {}
                _temp['img'] = _nd[0]
                _temp['user'] = _nd[1]
                _temp['comment'] = _nd[2]
                _temp['num'] = {}
                _idx = 0
                # modified by niuben at 2014-03-19
                m = re.search(_num_regex[0], _nd[3])

                if (m is None):
                    m = re.search(_num_regex[1], _nd[3])

                if (m is not None):
                    _num_list = [m.group(1), m.group(2), m.group(3)]
                else:
                    _num_list = [0, 0, 0]

                for _num in _num_list:
                    _idx += 1
                    _t = _num_map.get(_idx)
                    if _t is not None:
                        _temp['num'][_t] = _num
                _temp['time'] = _nd[4]
                _temp['from'] = _nd[5]
                _ret.append(_temp)
        return _ret

    def _parse_comment_node(self, node):
        _rs = {}
        for _n in node:
            if _n.attrib['class'] == 'node_head':
                _img = _n.find('a').find('img')
                if _img:
                    _rs['img'] = _img.attrib['data-src']
                pass
            elif _n.attrib['class'] == 'node_content_all':
                pass

        return _rs
Exemplo n.º 7
0
 def __init__(self):
     self.requestAPI = RequestAPI()
     pass
Exemplo n.º 8
0
class RequestService:
    
    def __init__(self):
        self.requestAPI = RequestAPI()
        pass
        
    def search_comment(self, keyword):

        _x = self.requestAPI.search_comment(keyword)
#        _regex = 'STK && STK.pageletM && STK.pageletM.view\({([^}]+)}\)'
        _regex = 'STK && STK.pageletM && STK.pageletM.view\({"pid":"pl_wb_feedlist",.*?"html":([^}]+)}\)'
#        _node_regex = '<dl[^>]+>.*?<dt[^>]+>.*?<a[^>]+>.*?<img.*?src=\\\\"([^"]+)"[^>]+>.*?<\\\\/a>.*?<\\\\/dt>.*?<dd[^>]+>.*?<p[^>]+>(.*?)<\\\\/p>.*?<ul[^>]+>.*?<\\\\/ul>.*?<dl[^>]+>.*?<\\\\/dl>.*?<p[^>]+>.*?<span>(.*?)<\\\\/span>.*?<a[^>]+>(.*?)<\\\\/a>.*?<a[^>]+>(.*?)<\\\\/a>'
        _node_regex = '<dl[^>]+>.*?<dt[^>]+>.*?<a[^>]+>.*?<img.*?src=\\\\"([^"]+)"[^>]+>.*?<\\\\/a>.*?<\\\\/dt>.*?<dd[^>]+>.*?<p[^>]+>.*?<a[^>]+>(.*?)<a[^>]+>.*?<\\\\/a>.*?<\\\\/a>.*?<em>(.*?)<\\\\/em>.*?<\\\\/p>.*?<ul[^>]+>.*?<\\\\/ul>.*?<dl[^>]+>.*?<\\\\/dl>.*?<p[^>]+>.*?<span>(.*?)<\\\\/span>.*?<a[^>]+>(.*?)<\\\\/a>.*?<a[^>]+>(.*?)<\\\\/a>'
        #_num_regex = '<a[^>]+>.*?<em[^>]+>.*<\\\\/em>(.*?)</a>.*<a[^>]>(.*?)<\\\\/a>.*?<a[^>]>(.*?)<\\\\/a>'
        _num_regex = [
                      r'\\u8d5e<\\/em>\(?(\d*)\)?.*\\u8f6c\\u53d1\(?(\d*)\)?.*\\u8bc4\\u8bba\(?(\d*)\)?',
                      r'\\\\u8d5e<\\\\/em>\(?(\d*)\)?.*\\\\u8f6c\\\\u53d1\(?(\d*)\)?.*\\\\u8bc4\\\\u8bba\(?(\d*)\)?',
                      ]
        _num_map = {1: 'praises', 2: 'retweets', 3: 'reviews'}
        _ret = []

        for _node in re.findall(_regex, _x):
            for _nd in re.findall(_node_regex, _node):
                _temp = {}
                _temp['img'] = _nd[0]
                _temp['user'] = _nd[1]
                _temp['comment'] = _nd[2]
                _temp['num'] = {}
                _idx = 0
                # modified by niuben at 2014-03-19
                m = re.search(_num_regex[0], _nd[3])

                if (m is None):
                    m = re.search(_num_regex[1], _nd[3])

                if (m is not None):
                    _num_list = [m.group(1),m.group(2),m.group(3)]
                else:
                    _num_list = [0,0,0]
                
                for _num in _num_list:
                    _idx += 1
                    _t = _num_map.get(_idx)
                    if _t is not None:
                        _temp['num'][_t] = _num
                _temp['time'] = _nd[4]
                _temp['from'] = _nd[5]
                _ret.append(_temp)
        return _ret

    def _parse_comment_node(self, node):
        _rs = {}
        for _n in node:
            if _n.attrib['class'] == 'node_head':
                _img = _n.find('a').find('img')
                if _img:
                    _rs['img'] = _img.attrib['data-src']
                pass
            elif _n.attrib['class'] == 'node_content_all':
                pass

        return _rs
Exemplo n.º 9
0
 def __init__(self):
     self.dbHandler = dbfry.getInterface('mongodb', configs.db)
     self.requestAPI = RequestAPI()
     self.collection = 'douban_book'
     pass