Python Geo примеры использования

Язык программирования: Python

Пространство имен/Пакет: storage

Класс/Тип: Geo

Примеров на hotexamples.com: 6

Python Geo - 6 примеров найдено. Это лучшие примеры Python кода для storage.Geo, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Geo(4)

location(4)

longtitude(4)

Пример #1

Показать файл

Файл: parsers.py Проект: linVdcd/cola

    def parse(self, url=None):
        if self.bundle.exists is False:
            return

        url = url or self.url
        params = urldecode(url)
        try:
            br = self.opener.browse_open(url)
        except Exception as e:
            print(e)
            print('休息10分钟！')
            time.sleep(60 *
                       10)  #         self.logger.debug('load %s finish' % url)

        if not self.check(url, br):
            return

        weibo_user = self.get_weibo_user()

        params['_t'] = 0
        params['__rnd'] = str(int(time.time() * 1000))
        page = int(params.get('page', 1))
        pre_page = int(params.get('pre_page', 0))
        count = 15
        if 'pagebar' not in params:
            params['pagebar'] = '0'
            pre_page += 1
        elif params['pagebar'] == '0':
            params['pagebar'] = '1'
        elif params['pagebar'] == '1':
            del params['pagebar']
            pre_page = page
            page += 1
            count = 50
        params['count'] = count
        params['page'] = page
        params['pre_page'] = pre_page

        try:
            data = json.loads(br.response().read())['data']
        except Exception as e:
            print(e)
            print('休息10分钟！')
            time.sleep(60 * 10)  # self.logger.debug('load %s finish' % url)
        soup = beautiful_soup(data)
        finished = False

        divs = soup.find_all('div', attrs={'class': 'WB_feed_type'}, mid=True)
        max_id = None
        for div in divs:
            mid = div['mid']
            if len(mid) == 0:
                continue
            max_id = mid

            if 'end_id' not in params:
                params['end_id'] = mid
            if mid in weibo_user.newest_mids:
                finished = True
                break
            if len(self.bundle.newest_mids) < 3:
                self.bundle.newest_mids.append(mid)

            try:
                mblog = getattr(MicroBlog,
                                'objects').get(Q(mid=mid) & Q(uid=self.uid))
                continue  #认为已经爬过了
            except DoesNotExist:
                mblog = MicroBlog(mid=mid, uid=self.uid)
            content_div = div.find('div',
                                   attrs={
                                       'class': 'WB_text',
                                       'node-type': 'feed_list_content'
                                   })
            for img in content_div.find_all("img", attrs={'type': 'face'}):
                img.replace_with(img['title'])
            mblog.content = content_div.text
            #print(u'微博内容：'+mblog.content)
            is_forward = div.get('isforward') == '1'
            if is_forward:
                mblog.omid = div['omid']
                name_a = div.find('a',
                                  attrs={
                                      'class': 'WB_name',
                                      'node-type': 'feed_list_originNick'
                                  })
                text_a = div.find('div',
                                  attrs={
                                      'class': 'WB_text',
                                      'node-type': 'feed_list_reason'
                                  })
                if name_a is not None and text_a is not None:
                    mblog.forward = '%s: %s' % (name_a.text, text_a.text)
            mblog.created = parse(div.select('a.S_link2.WB_time')[0]['title'])

            if self.bundle.last_update is None or mblog.created > self.bundle.last_update:
                self.bundle.last_update = mblog.created
            if weibo_user.last_update is not None and \
                mblog.created <= weibo_user.last_update:
                finished = True
                break

            func_div = div.find_all('div', 'WB_func')[-1]
            action_type_re = lambda t: re.compile("^(feed_list|fl)_%s$" % t)

            likes = func_div.find('a',
                                  attrs={
                                      'action-type': action_type_re("like")
                                  }).text
            likes = likes.strip('(').strip(')')
            likes = 0 if len(likes) == 0 else int(likes)
            mblog.n_likes = likes
            forwards = func_div.find('a',
                                     attrs={
                                         'action-type':
                                         action_type_re("forward")
                                     }).text
            if '(' not in forwards:
                mblog.n_forwards = 0
            else:
                mblog.n_forwards = int(forwards.strip().split('(',
                                                              1)[1].strip(')'))
            comments = func_div.find('a',
                                     attrs={
                                         'action-type':
                                         action_type_re('comment')
                                     }).text
            if '(' not in comments:
                mblog.n_comments = 0
            else:
                mblog.n_comments = int(comments.strip().split('(',
                                                              1)[1].strip(')'))

            # fetch geo info
            map_info = div.find("div", attrs={'class': 'map_data'})
            if map_info is not None:
                geo = Geo()
                geo.location = map_info.text.split('-')[0].strip()
                geo_info = urldecode("?" +
                                     map_info.find('a')['action-data'])['geo']
                geo.longtitude, geo.latitude = tuple(
                    [float(itm) for itm in geo_info.split(',', 1)])
                mblog.geo = geo

            # fetch forwards and comments
            if fetch_forward or fetch_comment or fetch_like:
                query = {'id': mid, '_t': 0, '__rnd': int(time.time() * 1000)}
                query_str = urllib.urlencode(query)
                if fetch_forward and mblog.n_forwards > 0:
                    forward_url = 'http://weibo.com/aj/mblog/info/big?%s' % query_str
                    yield forward_url
                if fetch_comment and mblog.n_comments > fetch_n_comments:  #只抓取评论数多于规定条数的微博
                    comment_url = 'http://weibo.com/aj/comment/big?%s' % query_str
                    yield comment_url
                if fetch_like and mblog.n_likes > 0:
                    query = {
                        'mid': mid,
                        '_t': 0,
                        '__rnd': int(time.time() * 1000)
                    }
                    query_str = urllib.urlencode(query)
                    like_url = 'http://weibo.com/aj/like/big?%s' % query_str
                    yield like_url

            mblog.save()

        if 'pagebar' in params:
            params['max_id'] = max_id
        else:
            del params['max_id']


#         self.logger.debug('parse %s finish' % url)

# counter add one for the processed weibo list url
        self.counter.inc('processed_weibo_list_page', 1)

        # if not has next page
        if len(divs) == 0 or finished:
            weibo_user = self.get_weibo_user()
            for mid in self.bundle.newest_mids:
                if mid not in weibo_user.newest_mids:
                    weibo_user.newest_mids.append(mid)
            while len(weibo_user.newest_mids) > 3:
                weibo_user.newest_mids.pop()
            weibo_user.last_update = self.bundle.last_update
            weibo_user.save()
            return

        yield '%s?%s' % (url.split('?')[0], urllib.urlencode(params))

Пример #2

Показать файл

Файл: parsers.py Проект: renchaorevee/cola

    def parse(self, url=None):
        if self.bundle.exists == False:
            return [], []

        url = url or self.url
        params = urldecode(url)
        br = self.opener.browse_open(url)
        self.logger.debug("load %s finish" % url)

        if not self.check(url, br):
            return [], []

        weibo_user = self.get_weibo_user()

        params["_t"] = 0
        params["__rnd"] = str(int(time.time() * 1000))
        page = int(params.get("page", 1))
        pre_page = int(params.get("pre_page", 0))
        count = 15
        if "pagebar" not in params:
            params["pagebar"] = "0"
            pre_page += 1
        elif params["pagebar"] == "0":
            params["pagebar"] = "1"
        elif params["pagebar"] == "1":
            del params["pagebar"]
            pre_page = page
            page += 1
            count = 50
        params["count"] = count
        params["page"] = page
        params["pre_page"] = pre_page

        data = json.loads(br.response().read())["data"]
        soup = beautiful_soup(data)
        finished = False

        divs = soup.find_all("div", attrs={"class": "WB_feed_type"}, mid=True)
        max_id = None
        next_urls = []
        for div in divs:
            mid = div["mid"]
            if len(mid) == 0:
                continue
            max_id = mid

            if "end_id" not in params:
                params["end_id"] = mid
            if mid in weibo_user.newest_mids:
                finished = True
                break
            if len(self.bundle.newest_mids) < 3:
                self.bundle.newest_mids.append(mid)

            try:
                mblog = getattr(MicroBlog, "objects").get(Q(mid=mid) & Q(uid=self.uid))
            except DoesNotExist:
                mblog = MicroBlog(mid=mid, uid=self.uid)
            content_div = div.find("div", attrs={"class": "WB_text", "node-type": "feed_list_content"})
            for img in content_div.find_all("img", attrs={"type": "face"}):
                img.replace_with(img["title"])
            mblog.content = content_div.text
            is_forward = div.get("isforward") == "1"
            if is_forward:
                name_a = div.find("a", attrs={"class": "WB_name", "node-type": "feed_list_originNick"})
                text_a = div.find("div", attrs={"class": "WB_text", "node-type": "feed_list_reason"})
                if name_a is not None and text_a is not None:
                    mblog.forward = "%s: %s" % (name_a.text, text_a.text)
            mblog.created = parse(div.select("a.S_link2.WB_time")[0]["title"])

            if self.bundle.last_update is None or mblog.created > self.bundle.last_update:
                self.bundle.last_update = mblog.created
            if weibo_user.last_update is not None and mblog.created <= weibo_user.last_update:
                finished = True
                break

            likes = div.find("a", attrs={"action-type": "feed_list_like"}).text
            likes = likes.strip("(").strip(")")
            likes = 0 if len(likes) == 0 else int(likes)
            mblog.n_likes = likes
            forwards = div.find("a", attrs={"action-type": "feed_list_forward"}).text
            if "(" not in forwards:
                mblog.n_forwards = 0
            else:
                mblog.n_forwards = int(forwards.strip().split("(", 1)[1].strip(")"))
            comments = div.find("a", attrs={"action-type": "feed_list_comment"}).text
            if "(" not in comments:
                mblog.n_comments = 0
            else:
                mblog.n_comments = int(comments.strip().split("(", 1)[1].strip(")"))

            # fetch geo info
            map_info = div.find("div", attrs={"class": "map_data"})
            if map_info is not None:
                geo = Geo()
                geo.location = map_info.text.split("-")[0].strip()
                geo_info = urldecode("?" + map_info.find("a")["action-data"])["geo"]
                geo.longtitude, geo.latitude = tuple([float(itm) for itm in geo_info.split(",", 1)])
                mblog.geo = geo

            # fetch forwards and comments
            if fetch_forward or fetch_comment or fetch_like:
                query = {"id": mid, "_t": 0, "__rnd": int(time.time() * 1000)}
                query_str = urllib.urlencode(query)
                if fetch_forward and mblog.n_forwards > 0:
                    forward_url = "http://weibo.com/aj/comment/big?%s" % query_str
                    next_urls.append(forward_url)
                if fetch_comment and mblog.n_comments > 0:
                    comment_url = "http://weibo.com/aj/mblog/info/big?%s" % query_str
                    next_urls.append(comment_url)
                if fetch_like and mblog.n_likes > 0:
                    query = {"mid": mid, "_t": 0, "__rnd": int(time.time() * 1000)}
                    query_str = urllib.urlencode(query)
                    like_url = "http://weibo.com/aj/like/big?%s" % query_str
                    next_urls.append(like_url)

            mblog.save()

        if "pagebar" in params:
            params["max_id"] = max_id
        else:
            del params["max_id"]
        self.logger.debug("parse %s finish" % url)

        # if not has next page
        if len(divs) == 0 or finished:
            weibo_user = self.get_weibo_user()
            for mid in self.bundle.newest_mids:
                if mid not in self.bundle.newest_mids:
                    weibo_user.newest_mids.append(mid)
            while len(weibo_user.newest_mids) > 3:
                weibo_user.newest_mids.pop()
            weibo_user.last_update = self.bundle.last_update
            weibo_user.save()
            return [], []

        next_urls.append("%s?%s" % (url.split("?")[0], urllib.urlencode(params)))
        return next_urls, []

Пример #3

Показать файл

Файл: parsers.py Проект: Chenxofhit/cola

 def parse(self, url=None):
     if self.bundle.exists == False:
         return [], []
     
     url = url or self.url
     params = urldecode(url)
     br = self.opener.browse_open(url)
     self.logger.debug('load %s finish' % url)
     
     if not self.check(url, br):
         return [], []
         
     weibo_user = self.get_weibo_user()
     
     params['_t'] = 0
     params['__rnd'] = str(int(time.time() * 1000))
     page = int(params.get('page', 1))
     pre_page = int(params.get('pre_page', 0))
     count = 15
     if 'pagebar' not in params:
         params['pagebar'] = '0'
         pre_page += 1
     elif params['pagebar'] == '0':
         params['pagebar'] = '1'
     elif params['pagebar'] == '1':
         del params['pagebar']
         pre_page = page
         page += 1
         count = 50
     params['count'] = count
     params['page'] = page
     params['pre_page'] = pre_page
     
     data = json.loads(br.response().read())['data']
     soup = beautiful_soup(data)
     finished = False
     
     divs = soup.find_all('div', attrs={'class': 'WB_feed_type'},  mid=True)
     max_id = None
     next_urls = []
     for div in divs:
         mid = div['mid']
         if len(mid) == 0:
             continue
         max_id = mid
         
         if 'end_id' not in params:
             params['end_id'] = mid
         if mid in weibo_user.newest_mids:
             finished = True
             break
         if len(self.bundle.newest_mids) < 3:
             self.bundle.newest_mids.append(mid)
         
         try:
             mblog = getattr(MicroBlog, 'objects').get(Q(mid=mid)&Q(uid=self.uid))
         except DoesNotExist:
             mblog = MicroBlog(mid=mid, uid=self.uid)
         content_div = div.find('div', attrs={
             'class': 'WB_text', 
             'node-type': 'feed_list_content'
         })
         for img in content_div.find_all("img", attrs={'type': 'face'}):
             img.replace_with(img['title']);
         mblog.content = content_div.text
         is_forward = div.get('isforward') == '1'
         if is_forward:
             name_a = div.find('a', attrs={
                 'class': 'WB_name', 
                 'node-type': 'feed_list_originNick'
             })
             text_a = div.find('div', attrs={
                 'class': 'WB_text',
                 'node-type': 'feed_list_reason'
             })
             if name_a is not None and text_a is not None:
                 mblog.forward = '%s: %s' % (
                     name_a.text,
                     text_a.text
                 )
         mblog.created = parse(div.select('a.S_link2.WB_time')[0]['title'])
         
         if self.bundle.last_update is None or mblog.created > self.bundle.last_update:
             self.bundle.last_update = mblog.created
         if weibo_user.last_update is not None and \
             mblog.created <= weibo_user.last_update:
             finished = True
             break
         
         likes = div.find('a', attrs={'action-type': 'feed_list_like'}).text
         likes = likes.strip('(').strip(')')
         likes = 0 if len(likes) == 0 else int(likes)
         mblog.n_likes = likes
         forwards = div.find('a', attrs={'action-type': 'feed_list_forward'}).text
         if '(' not in forwards:
             mblog.n_forwards = 0
         else:
             mblog.n_forwards = int(forwards.strip().split('(', 1)[1].strip(')'))
         comments = div.find('a', attrs={'action-type': 'feed_list_comment'}).text
         if '(' not in comments:
             mblog.n_comments = 0
         else:
             mblog.n_comments = int(comments.strip().split('(', 1)[1].strip(')'))
             
         # fetch geo info
         map_info = div.find("div", attrs={'class': 'map_data'})
         if map_info is not None:
             geo = Geo()
             geo.location = map_info.text.split('-')[0].strip()
             geo_info = urldecode("?"+map_info.find('a')['action-data'])['geo']
             geo.longtitude, geo.latitude = tuple([float(itm) for itm in geo_info.split(',', 1)])
             mblog.geo = geo
         
         # fetch forwards and comments
         if fetch_forward or fetch_comment or fetch_like:
             query = {'id': mid, '_t': 0, '__rnd': int(time.time()*1000)}
             query_str = urllib.urlencode(query)
             if fetch_forward and mblog.n_forwards > 0:
                 forward_url = 'http://weibo.com/aj/comment/big?%s' % query_str
                 next_urls.append(forward_url)
             if fetch_comment and mblog.n_comments > 0:
                 comment_url = 'http://weibo.com/aj/mblog/info/big?%s' % query_str
                 next_urls.append(comment_url)
             if fetch_like and mblog.n_likes > 0:
                 query = {'mid': mid, '_t': 0, '__rnd': int(time.time()*1000)}
                 query_str = urllib.urlencode(query)
                 like_url = 'http://weibo.com/aj/like/big?%s' % query_str
                 next_urls.append(like_url)
         
         mblog.save()
     
     if 'pagebar' in params:
         params['max_id'] = max_id
     else:
         del params['max_id']
     self.logger.debug('parse %s finish' % url)
             
     # if not has next page
     if len(divs) == 0 or finished:
         weibo_user = self.get_weibo_user()
         for mid in self.bundle.newest_mids:
             if mid not in self.bundle.newest_mids:
                 weibo_user.newest_mids.append(mid)
         while len(weibo_user.newest_mids) > 3:
             weibo_user.newest_mids.pop()
         weibo_user.last_update = self.bundle.last_update
         weibo_user.save()
         return [], []
     
     next_urls.append('%s?%s'%(url.split('?')[0], urllib.urlencode(params)))
     return next_urls, []

Пример #4

Показать файл

Файл: parsers.py Проект: winater/SimpleEarlyWarningSystem

    def parse(self, url=None):
        if self.bundle.exists == False:
            return [], []
        
        url = url or self.url
        params = urldecode(url)
        br = self.opener.browse_open(url)
        self.logger.debug('load %s finish' % url)
        
        if not self.check(url, br):
            return [], []
            
        weibo_user = self.get_weibo_user()
        
        params['_t'] = 0
        params['__rnd'] = str(int(time.time() * 1000))
        page = int(params.get('page', 1))
        pre_page = int(params.get('pre_page', 0))
        count = 15
        if 'pagebar' not in params:
            params['pagebar'] = '0'
            pre_page += 1
        elif params['pagebar'] == '0':
            params['pagebar'] = '1'
        elif params['pagebar'] == '1':
            del params['pagebar']
            pre_page = page
            page += 1
            count = 50
        params['count'] = count
        params['page'] = page
        params['pre_page'] = pre_page
        
        data = json.loads(br.response().read())['data']
        soup = beautiful_soup(data)
        finished = False
        
        divs = soup.find_all('div', attrs={'class': 'WB_feed_type'},  mid=True)
        max_id = None
        next_urls = []
        for div in divs:
            mid = div['mid']
            if len(mid) == 0:
                continue
            max_id = mid
            
            if 'end_id' not in params:
                params['end_id'] = mid
            if mid in weibo_user.newest_mids:
                finished = True
                break
            if len(self.bundle.newest_mids) < 3:
                self.bundle.newest_mids.append(mid)
            
            try:
                mblog = getattr(MicroBlog, 'objects').get(Q(mid=mid)&Q(uid=self.uid))
            except DoesNotExist:
                mblog = MicroBlog(mid=mid, uid=self.uid)
            content_div = div.find('div', attrs={
                'class': 'WB_text', 
                'node-type': 'feed_list_content'
            })
            for img in content_div.find_all("img", attrs={'type': 'face'}):
                img.replace_with(img['title']);
            mblog.content = content_div.text
            is_forward = div.get('isforward') == '1'
            if is_forward:
                mblog.omid = div['omid']
                name_a = div.find('a', attrs={
                    'class': 'WB_name', 
                    'node-type': 'feed_list_originNick'
                })
                text_a = div.find('div', attrs={
                    'class': 'WB_text',
                    'node-type': 'feed_list_reason'
                })
                if name_a is not None and text_a is not None:
                    mblog.forward = '%s: %s' % (
                        name_a.text,
                        text_a.text
                    )
            #mblog.created = parse(div.select('a.S_link2.WB_time')[0]['title'])
            #ci
            #
            temp = parse(div.select('a.S_link2.WB_time')[0]['title'])
            tempstring = temp.strftime("%Y-%m-%d-%H-%M-%S")
            list=tempstring.split('-')
            tempyear=list[0]
            tempmonth=list[1]
            tempday=list[2]
            temphour=list[3]
            tempmin=list[4]
            tempsec=list[5]
            temptime=time.mktime(datetime(int(tempyear),int(tempmonth),int(tempday),int(temphour),int(tempmin),int(tempsec)).timetuple())
            print temptime
            
            timevalue=open("D:\\09Limited_buffer\\earlywarningbyci\\cola\\contrib\\weibo\\timevalue.txt","r")
            time_re=timevalue.readline()
            timevalue.close()
            list=time_re.split()
            starttime=list[0]
            endtime=list[1]
            print starttime
            temptime=round(float(temptime))
            starttime=round(float(starttime))
            endtime=round(float(endtime))
            if temptime>=starttime and temptime<=endtime:
                mblog.created = temp
                #timeok = True
                print "------OKOKOKOKOKOKOKOKOKOKOKOKOKOKOKOKOKOK-----"
            else:
                if temptime<starttime:
                    print "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
                    time.sleep(5)
                    return [], []
                #continue
            #
            # 
            if self.bundle.last_update is None or mblog.created > self.bundle.last_update:
                self.bundle.last_update = mblog.created
            if weibo_user.last_update is not None and \
                mblog.created <= weibo_user.last_update:
                finished = True
                break

            func_div = div.find_all('div', 'WB_func')[-1]
            action_type_re = lambda t: re.compile("^(feed_list|fl)_%s$" % t)
            
            likes = func_div.find('a', attrs={'action-type': action_type_re("like")}).text
            likes = likes.strip('(').strip(')')
            likes = 0 if len(likes) == 0 else int(likes)
            mblog.n_likes = likes
            forwards = func_div.find('a', attrs={'action-type': action_type_re("forward")}).text
            if '(' not in forwards:
                mblog.n_forwards = 0
            else:
                mblog.n_forwards = int(forwards.strip().split('(', 1)[1].strip(')'))
            comments = func_div.find('a', attrs={'action-type': action_type_re('comment')}).text
            if '(' not in comments:
                mblog.n_comments = 0
            else:
                mblog.n_comments = int(comments.strip().split('(', 1)[1].strip(')'))
                
            # fetch geo info
            map_info = div.find("div", attrs={'class': 'map_data'})
            if map_info is not None:
                geo = Geo()
                geo.location = map_info.text.split('-')[0].strip()
                geo_info = urldecode("?"+map_info.find('a')['action-data'])['geo']
                geo.longtitude, geo.latitude = tuple([float(itm) for itm in geo_info.split(',', 1)])
                mblog.geo = geo
            
            # fetch forwards and comments
            if fetch_forward or fetch_comment or fetch_like:
                query = {'id': mid, '_t': 0, '__rnd': int(time.time()*1000)}
                query_str = urllib.urlencode(query)
                if fetch_forward and mblog.n_forwards > 0:
                    forward_url = 'http://weibo.com/aj/mblog/info/big?%s' % query_str
                    next_urls.append(forward_url)
                if fetch_comment and mblog.n_comments > 0:
                    comment_url = 'http://weibo.com/aj/comment/big?%s' % query_str
                    next_urls.append(comment_url)
                if fetch_like and mblog.n_likes > 0:
                    query = {'mid': mid, '_t': 0, '__rnd': int(time.time()*1000)}
                    query_str = urllib.urlencode(query)
                    like_url = 'http://weibo.com/aj/like/big?%s' % query_str
                    next_urls.append(like_url)
            
            mblog.save()
        
        if 'pagebar' in params:
            params['max_id'] = max_id
        else:
            del params['max_id']
        self.logger.debug('parse %s finish' % url)
                
        # if not has next page
        if len(divs) == 0 or finished:
            weibo_user = self.get_weibo_user()
            for mid in self.bundle.newest_mids:
                if mid not in weibo_user.newest_mids:
                    weibo_user.newest_mids.append(mid)
            while len(weibo_user.newest_mids) > 3:
                weibo_user.newest_mids.pop()
            weibo_user.last_update = self.bundle.last_update
            weibo_user.save()
            return [], []
        
        next_urls.append('%s?%s'%(url.split('?')[0], urllib.urlencode(params)))
        return next_urls, []

Пример #5

Показать файл

Файл: parsers.py Проект: brightgems/cola

    def parse(self, url=None):
        if self.bundle.exists is False:
            return

        url = url or self.url
        params = urldecode(url)
        try:
            br = self.opener.browse_open(url)
        except URLError:
            raise FetchBannedError()

        if not self.check(url, br):
            return

        weibo_user = self.get_weibo_user()

        params['_t'] = 0
        params['__rnd'] = str(int(time.time() * 1000))
        page = int(params.get('page', 1))
        pre_page = int(params.get('pre_page', 0))
        count = 15
        if 'pagebar' not in params:
            params['pagebar'] = '0'
            pre_page += 1
        elif params['pagebar'] == '0':
            params['pagebar'] = '1'
        elif params['pagebar'] == '1':
            del params['pagebar']
            pre_page = page
            page += 1
            count = 50
        params['count'] = count
        params['page'] = page
        params['pre_page'] = pre_page

        try:
            data = json.loads(br.response().read())['data']
        except (ValueError, KeyError):
            raise FetchBannedError('fetch banned by weibo server')
        soup = beautiful_soup(data)
        finished = False

        divs = soup.find_all('div', attrs={'class': 'WB_feed_type'}, mid=True)
        max_id = None
        for div in divs:
            mid = div['mid']
            if len(mid) == 0:
                continue
            max_id = mid
            blog_create_date = parse(
                div.select('a.S_link2.WB_time')[0]['title'])
            # skip all following blogs if create date less than effective start date
            if (blog_create_date - effective_start_date).days < 0:
                self.logger.info(
                    "%s: blog has sync up after %s" %
                    (self.uid, effective_start_date.strftime("%Y%m%d")))
                finished = True
                break

            if 'end_id' not in params:
                params['end_id'] = mid
            # skip
            #if weibo_user.newest_mids and not mid in weibo_user.newest_mids:
            #    self.logger.info("%s: reach earliest blog %s" % (self.uid,mid))
            #    finished = True
            #    break
            if len(self.bundle.newest_mids) < 3:
                self.bundle.newest_mids.append(mid)

            try:
                mblog = getattr(MicroBlog,
                                'objects').get(Q(mid=mid) & Q(uid=self.uid))
            except DoesNotExist:
                mblog = MicroBlog(mid=mid, uid=self.uid)
            content_div = div.find('div',
                                   attrs={
                                       'class': 'WB_text',
                                       'node-type': 'feed_list_content'
                                   })
            for img in content_div.find_all("img", attrs={'type': 'face'}):
                img.replace_with(img['title'])
            mblog.content = content_div.text
            is_forward = div.get('isforward')
            if is_forward:
                # write origional user, msg
                mblog.omid = div['omid']
                tbinfos = div['tbinfo'].split('&')
                mblog.ouid = tbinfos[0].split('=')[1]
                name_a = div.find('a',
                                  attrs={
                                      'class': 'WB_name',
                                      'node-type': 'feed_list_originNick'
                                  })
                text_a = div.find('div',
                                  attrs={
                                      'class': 'WB_text',
                                      'node-type': 'feed_list_reason'
                                  })
                if name_a is not None and text_a is not None:
                    mblog.forward = '%s: %s' % (name_a.text, text_a.text)
            mblog.created = blog_create_date
            mblog.last_update = datetime.now()

            func_div = div.find_all('div', 'WB_func')[-1]
            action_type_re = lambda t: re.compile("^(feed_list|fl)_%s$" % t)

            likes = func_div.find('a',
                                  attrs={
                                      'action-type': action_type_re("like")
                                  }).text
            likes = likes.strip('(').strip(')')
            likes = 0 if len(likes) == 0 else int(likes)
            mblog.n_likes = likes
            forwards = func_div.find('a',
                                     attrs={
                                         'action-type':
                                         action_type_re("forward")
                                     }).text
            if '(' not in forwards:
                mblog.n_forwards = 0
            else:
                mblog.n_forwards = int(forwards.strip().split('(',
                                                              1)[1].strip(')'))
            comments = func_div.find('a',
                                     attrs={
                                         'action-type':
                                         action_type_re('comment')
                                     }).text
            if '(' not in comments:
                mblog.n_comments = 0
            else:
                mblog.n_comments = int(comments.strip().split('(',
                                                              1)[1].strip(')'))

            # fetch geo info
            map_info = div.find("div", attrs={'class': 'map_data'})
            if map_info is not None:
                geo = Geo()
                geo.location = map_info.text.split('-')[0].strip()
                geo_info = urldecode("?" +
                                     map_info.find('a')['action-data'])['geo']
                geo.longtitude, geo.latitude = tuple(
                    [float(itm) for itm in geo_info.split(',', 1)])
                mblog.geo = geo
            # has_video
            div_video = div.find(
                'div', attrs={'node-type': 'fl_h5_video_disp'}) or div.find(
                    'span', attrs={'class': 'icon_playvideo'})
            mblog.has_video = True if div_video else False
            mblog.save()
            self.counter.inc('processed_weibo_posts', 1)

            # fetch forwards and comments
            if self.uid in starts:
                query = {'id': mid, '_t': 0, '__rnd': int(time.time() * 1000)}
                query_str = urllib.urlencode(query)
                if fetch_forward and mblog.n_forwards > 0:
                    forward_url = 'http://weibo.com/aj/mblog/info/big?%s' % query_str
                    yield forward_url
                if fetch_comment and mblog.n_comments > 0:
                    comment_url = 'http://weibo.com/aj/comment/big?%s' % query_str
                    yield comment_url
                if fetch_like and mblog.n_likes > 0:
                    query = {
                        'mid': mid,
                        '_t': 0,
                        '__rnd': int(time.time() * 1000)
                    }
                    query_str = urllib.urlencode(query)
                    like_url = 'http://weibo.com/aj/like/big?%s' % query_str
                    yield like_url

            yield '%s?%s' % (url.split('?')[0], urllib.urlencode(params))

        if params.has_key('pagebar'):
            params['max_id'] = max_id
        elif params.has_key('max_id'):
            del params['max_id']


#         self.logger.debug('parse %s finish' % url)

# counter add one for the processed weibo list url
        self.counter.inc('processed_weibo_list_page', 1)

        # if not has next page
        if len(divs) == 0 or finished:
            weibo_user = self.get_weibo_user()
            for mid in self.bundle.newest_mids:
                if mid not in weibo_user.newest_mids:
                    weibo_user.newest_mids.append(mid)
            while len(weibo_user.newest_mids) > 3:
                weibo_user.newest_mids.pop()
            weibo_user.last_update = self.bundle.last_update
            weibo_user.save()
            return

Пример #6

Показать файл

Файл: parsers.py Проект: brightgems/cola

    def save_blog_detail(self, div, mid):
        try:
            mblog = getattr(MicroBlog,
                            'objects').get(Q(mid=mid) & Q(uid=self.uid))
        except DoesNotExist:
            mblog = MicroBlog(mid=mid, uid=self.uid)
        content_div = div.find('div',
                               attrs={
                                   'class': 'WB_text',
                                   'node-type': 'feed_list_content'
                               })
        blog_create_date = parse(
            div.find('a', attrs={'node-type': 'feed_list_item_date'})['title'])

        for img in content_div.find_all("img", attrs={'type': 'face'}):
            img.replace_with(img['title'])
        mblog.content = content_div.text
        is_forward = div.get('isforward')
        if is_forward:
            # write origional user, msg
            mblog.omid = div['omid']
            tbinfos = div['tbinfo'].split('&')
            mblog.ouid = tbinfos[0].split('=')[1]
            name_a = div.find('a',
                              attrs={
                                  'class': 'WB_name',
                                  'node-type': 'feed_list_originNick'
                              })
            text_a = div.find('div',
                              attrs={
                                  'class': 'WB_text',
                                  'node-type': 'feed_list_reason'
                              })
            if name_a is not None and text_a is not None:
                mblog.forward = '%s: %s' % (name_a.text, text_a.text)
        mblog.created = blog_create_date
        mblog.last_update = datetime.now()

        func_div = div.find_all('div',
                                attrs={'node-type': 'feed_list_options'})[-1]
        action_type_re = lambda t: re.compile("^(feed_list|fl)_%s$" % t)

        likes = func_div.find('a',
                              attrs={
                                  'action-type': action_type_re("like")
                              }).find_all('em')[1].text
        likes = likes.strip('(').strip(')').replace(',', '')
        likes = int(likes) if likes and unicode.isdigit(likes) else 0
        mblog.n_likes = likes
        forwards = func_div.find('a',
                                 attrs={
                                     'action-type': action_type_re("forward")
                                 }).find_all('em')[1].text
        forwards = forwards.strip('(').strip(')').replace(',', '')
        mblog.n_forwards = int(
            forwards) if forwards and unicode.isdigit(forwards) else 0
        comments = func_div.find('a',
                                 attrs={
                                     'action-type': action_type_re('comment')
                                 }).find_all('em')[1].text
        comments = comments.strip('(').strip(')').replace(',', '')
        mblog.n_comments = int(
            comments) if comments and unicode.isdigit(comments) else 0

        # fetch geo info
        map_info = div.find("div", attrs={'class': 'map_data'})
        if map_info is not None:
            geo = Geo()
            geo.location = map_info.text.split('-')[0].strip()
            geo_info = urldecode("?" +
                                 map_info.find('a')['action-data'])['geo']
            geo.longtitude, geo.latitude = tuple(
                [float(itm) for itm in geo_info.split(',', 1)])
            mblog.geo = geo
        # has_video
        div_video = div.find('div', attrs={
            'node-type': 'fl_h5_video_disp'
        }) or div.find('span', attrs={'class': 'icon_playvideo'})
        mblog.has_video = True if div_video else False
        mblog.save()
        return mblog