示例#1
0
    def repost_timeline(self, client, fid, limit):
        mid = misc.decode62(fid)
        fh = open('%s_repost.txt' % fid, 'a')
        kwds = {
            'id': mid,
            'count': 100,
            'page': 1,
        }
        buf = []
        #while True:
        for _ in xrange(1):
            rs = client.api.repost_timeline(**kwds) or list()
            print 'page: %s,%s' % (kwds['page'], len(rs))
            if not rs and kwds['page'] > limit: break
            if not rs:
                print 'retry...'
                continue

            for e in rs:
                user = e.user
                txt = e.text.encode('utf8', 'ignore')
                user._txt = txt
                user._ts = int(time.mktime(e.created_at.timetuple())),
                user._ct = e.created_at.strftime("%Y-%m-%d %H:%M:%S"),

                self._write_user(fh, user)
                buf.append(user)
            kwds['page'] += 1
            time.sleep(3)
        fh.close()

        fh = open('%s_repost2.txt' % fid, 'w')
        skips = set()
        buf.reverse()
        for user in buf:
            if user.id in skips: continue
            if '此微博已被删除' in user._txt: continue
            skips.add(user.id)

            self._write_user(fh, user)

        fh.close()
示例#2
0
文件: openapi2.py 项目: Big-Data/ec2
    def repost_timeline(self, client, fid, limit):
        mid = misc.decode62(fid) 
        fh = open('%s_repost.txt'%fid , 'a')
        kwds = {
            'id':   mid,
            'count': 100,
            'page': 1,
        }
        buf = []
        #while True:    
        for _ in xrange(1):        
            rs = client.api.repost_timeline(**kwds) or list()
            print 'page: %s,%s'%(kwds['page'], len(rs) )
            if not rs and kwds['page']>limit:  break
            if not rs: 
                print 'retry...'
                continue

            for e in rs:
                user = e.user
                txt = e.text.encode('utf8','ignore')
                user._txt = txt
                user._ts =  int(time.mktime(e.created_at.timetuple())), 
                user._ct =  e.created_at.strftime("%Y-%m-%d %H:%M:%S"),

                self._write_user(fh, user)
                buf.append( user )
            kwds['page'] +=1
            time.sleep(3)
        fh.close()
            
        fh = open('%s_repost2.txt'%fid , 'w')
        skips =set()
        buf.reverse()
        for user in buf:
            if user.id in skips: continue
            if '此微博已被删除' in user._txt: continue
            skips.add( user.id )
            
            self._write_user( fh, user )

        fh.close()
示例#3
0
    def repost2_timeline(self, client, fid, limit):
        mid = misc.decode62(fid)
        #fh = open('%s_repost2.txt'%fid , 'a')
        fh = open('%s_repost2.txt' % fid, 'w')
        kwds = {
            'id': mid,
            'count': 100,
            'page': 1,
        }
        iRetry = 0
        while True:
            time.sleep(5)
            rs = client.api.repost_timeline(**kwds) or list()
            print 'page: %s,%s' % (kwds['page'], len(rs))
            if not rs and kwds['page'] > limit: break
            if not rs:
                print 'retry...'
                iRetry += 1
                if iRetry > 3: break
                continue

            iRetry = 0
            print '... find %s' % len(rs)
            for e in rs:
                txt = e.text.encode('utf8', 'ignore'),
                #if not '@屈臣氏中国' in txt: continue
                #user = e.user

                obj = {
                    'mid': e.mid,
                    'txt': e.text.encode('utf8', 'ignore'),
                    'ts': int(time.mktime(e.created_at.timetuple())),
                    'ca': e.created_at.strftime("%Y-%m-%d %H:%M:%S"),
                    'user_name': e.user.screen_name.encode('utf8', 'ignore'),
                    'user_id': e.user.id,
                }

                fh.write('%s\n' % json.dumps(obj))
                fh.flush()
            kwds['page'] += 1
        fh.close()
示例#4
0
文件: openapi2.py 项目: Big-Data/ec2
    def repost2_timeline(self, client, fid, limit):
        mid = misc.decode62(fid) 
        #fh = open('%s_repost2.txt'%fid , 'a')
        fh = open('%s_repost2.txt'%fid ,'w' )
        kwds = {
            'id':   mid,
            'count': 100,
            'page': 1,
        }
        iRetry = 0
        while True:    
            time.sleep(5)
            rs = client.api.repost_timeline(**kwds) or list()
            print 'page: %s,%s'%(kwds['page'], len(rs) )
            if not rs and kwds['page']>limit:  break
            if not rs: 
                print 'retry...'
                iRetry +=1
                if iRetry>3: break
                continue
            
            iRetry = 0
            print '... find %s'%len(rs)
            for e in rs:
                txt = e.text.encode('utf8','ignore'),
                #if not '@屈臣氏中国' in txt: continue 
                #user = e.user
                
                obj = {
                    'mid':  e.mid,
                    'txt': e.text.encode('utf8','ignore'),
                    'ts':  int(time.mktime(e.created_at.timetuple())), 
                    'ca':  e.created_at.strftime("%Y-%m-%d %H:%M:%S"),
                    'user_name': e.user.screen_name.encode('utf8','ignore'),
                    'user_id':  e.user.id,
                }

                fh.write('%s\n'%json.dumps(obj))
                fh.flush()
            kwds['page'] +=1
        fh.close()