示例#1
0
文件: status.py 项目: CCPLab/miner
    def load(self, dic):
        # 获取发布微博的用户信息
        uid = 0L
        user = dic.get('user',None)
        if user is None:
            uid = dic.get('uid', dic.get('user_id', None))
            if uid is None:
                raise ValueError('Data Error: Either User information or user id is expected in the status data!')
            else:
                self.uid = uid
        else:
            from profile import UserProfile
            uprofile = UserProfile()
            uprofile.load(user)
            uid = long(uprofile.idstr)
            self.uid = uid

            # 将用户插入到用户表
            self.batches.extend(uprofile.get_batches())

        self.mid = long( dic.get('idstr',dic.get('id')) )

        # 处理转发微博
        r_id = 0
        repost = dic.get('retweeted_status')

        if repost is None:
            pass
        else:
            from weibo import Repost
            # 将转发微博存在repost列族
            retweet = Repost()
            retweet.load(self.uid, self.mid, repost)
            # print retweet
            r_id = retweet.id
            self.batches.extend(retweet.get_batches())

            # 将转发微博对应的原创微博单独存放一份作为原始微博
            if repost.get('deleted', 0) == 0:  # 如果被转发的微博已经被删除,则不保存为原创微博
                original = Status()
                original.load(repost)
                self.batches.extend(original.get_batches())
        self.r_id = long(r_id)

        # 处理其他字段信息
        for attr in Status.attrs:
            v = dic.get(attr)
            if attr == 'key':
                v = dic.get('id')
            if v is None:
                if attr in ['reposts_count', 'comments_count', 'attitudes_count']:
                    v = 0
                else:
                    continue

            self.setattr(attr,v)

        ustatus = UserStatuses()
        ustatus.load(self, self.uid)
        self.batches.extend(ustatus.get_batches())
示例#2
0
文件: relation.py 项目: wshuyi/miner
    def get_batches(self):
        #用于存储需要更新的内容,可能涉及多个表、多个列族
        batches = []

        #根据关系用户,将关系用户的Profile插入到表中
        for uid, user in self.users.iteritems():
            uprofile = UserProfile()
            uprofile.load(user)
            batches_uprofile = uprofile.get_batches()
            batches.extend(batches_uprofile)

        #根据关系情况,把用户的id插入到_user:relation列族中
        mutations = []
        for uid, relation in self.uids.iteritems():
            qualifier = struct.pack('<q', uid)

            user = self.users[uid]
            gender = user['gender']
            vt = user['verified_type']
            vt = verified_type_map.get(vt, vt)
            v = vt << 3
            v |= 4L if gender == 'm' else 0L
            v |= relation

            v <<= 32
            v |= util.now2epoch()

            v = struct.pack('>q', v)
            m = Mutation(column="%s:%s" % (Relation.column_family, qualifier),
                         value=v)
            mutations.append(m)
        key = struct.pack('<q', self.self_uid)

        u_relation = {
            'tableName': Relation.tableName,
            'rowBatches': BatchMutation(row=key, mutations=mutations)
        }
        batches.append(u_relation)

        return batches
示例#3
0
文件: status.py 项目: wshuyi/miner
    def load(self, dic):
        # 获取发布微博的用户信息
        uid = 0L
        user = dic.get('user', None)
        if user is None:
            uid = dic.get('uid', dic.get('user_id', None))
            if uid is None:
                raise ValueError(
                    'Data Error: Either User information or user id is expected in the status data!'
                )
            else:
                self.uid = uid
        else:
            from profile import UserProfile
            uprofile = UserProfile()
            uprofile.load(user)
            uid = long(uprofile.idstr)
            self.uid = uid

            # 将用户插入到用户表
            self.batches.extend(uprofile.get_batches())

        self.mid = long(dic.get('idstr', dic.get('id')))

        # 处理转发微博
        r_id = 0
        repost = dic.get('retweeted_status')

        if repost is None:
            pass
        else:
            from weibo import Repost
            # 将转发微博存在repost列族
            retweet = Repost()
            retweet.load(self.uid, self.mid, repost)
            # print retweet
            r_id = retweet.id
            self.batches.extend(retweet.get_batches())

            # 将转发微博对应的原创微博单独存放一份作为原始微博
            if repost.get('deleted', 0) == 0:  # 如果被转发的微博已经被删除,则不保存为原创微博
                original = Status()
                original.load(repost)
                self.batches.extend(original.get_batches())
        self.r_id = long(r_id)

        # 处理其他字段信息
        for attr in Status.attrs:
            v = dic.get(attr)
            if attr == 'key':
                v = dic.get('id')
            if v is None:
                if attr in [
                        'reposts_count', 'comments_count', 'attitudes_count'
                ]:
                    v = 0
                else:
                    continue

            self.setattr(attr, v)

        ustatus = UserStatuses()
        ustatus.load(self, self.uid)
        self.batches.extend(ustatus.get_batches())