Пример #1
0
    def parse_recent_visit(self, file_path, time_step):
        # 必须新开线程执行
        while True:
            try:
                url, _ = self.get_main_page_url()
                res = self.req.get(url=url, headers=self.headers)
                content = json.loads(self.get_json(
                    res.content.decode("utf-8")))
                visit_data = content['data']['module_3']['data']
                if 'items' in visit_data:
                    visit_list = visit_data['items']
                    for item in visit_list:
                        self.visit_list.append(
                            dict(qq=item['uin'],
                                 time=util.get_full_time_from_mktime(
                                     item['time']),
                                 name=item['name']))

                visit_df = pd.DataFrame(self.visit_list)
                visit_df.drop_duplicates(inplace=True)
                visit_df.to_excel(file_path, index=False)
                time.sleep(time_step)
            except BaseException:
                print("获取最近访客出错")
                exit(3)
Пример #2
0
    def __init__(self):
        self.sp = QQZoneSpider(use_redis=False,
                               debug=False,
                               from_client=True,
                               mood_num=20)
        warm_tip = "****************************************\n" \
                   "**************QQ空间抽奖小程序***************\n" \
                   "****************************************"
        self.output(warm_tip)
        self.output("请输入获取最近访客的时间间隔,默认为60秒")
        time_step = input()
        try:
            time_step = int(time_step)
        except:
            time_step = 60
        visit_file_name = "最近访客" + get_full_time_from_mktime(int(
            time.time())) + ".xlsx"
        self.output("最近访客文件名:" + visit_file_name)

        try:
            self.sp.login_with_qr_code()
            self.output("用户" + self.sp.username + "登陆成功!")
        except BaseException:
            self.output("用户登陆失败!请检查网络连接或稍后再试!")
            exit(1)

        visit_t = threading.Thread(target=self.sp.parse_recent_visit,
                                   args=[visit_file_name, time_step])
        visit_t.start()
        self.output("正在获取最近的说说...")
        url_mood = self.sp.get_mood_url()
        url_mood = url_mood + '&uin=' + str(self.sp.username)
        self.content_list = self.get_content_list(url_mood, 0)
        self.content_list += self.get_content_list(url_mood, 20)
        self.content_list += self.get_content_list(url_mood, 40)
        self.output('------------------------')
        self.output("最近的60条说说:")
        for item in self.content_list:
            content = item['content']
            if len(content) > 20:
                content = content[0:20] + '。。。'
            item_str = '|' + str(item['order']) + '|' + content
            self.output(item_str)
        while True:
            try:
                self.output('------------------------')
                self.output("**以下输入请全部只输入数字!按回车键结束输入!**")
                self.output("**输入Q退出本程序**")
                is_digit = False
                while not is_digit:
                    self.output('请输入您要选择的说说序号:')
                    mood_order = input()
                    is_digit = self.check_input(mood_order)
                mood_order = int(mood_order)
                if mood_order > len(self.content_list):
                    pos = (math.ceil(mood_order / 20) - 1) * 20
                    mood_order = mood_order % 20
                    t1 = threading.Thread(target=self.get_all,
                                          args=(url_mood, pos, mood_order))
                    t1.setDaemon(True)
                    t1.start()
                else:
                    unikey = self.content_list[mood_order - 1]
                    key = unikey['unikey']
                    tid = unikey['tid']
                    t2 = threading.Thread(target=self.start_like_cmt_thread,
                                          args=(key, tid))
                    t2.setDaemon(True)
                    t2.start()
                is_digit = False
                while not is_digit:
                    self.output('请输入抽奖的类型,1-点赞;2-评论;(其它)-我全都要! :')
                    type = input()
                    is_digit = self.check_input(type)
                is_digit = False
                while not is_digit:
                    self.output('请选择抽奖的用户数量:')
                    user_num = input()
                    is_digit = self.check_input(user_num)
                # 等待线程运行完
                try:
                    t1.join()
                    t2.join()
                    self.like_t.join()
                    self.cmt_t.join()
                except:
                    pass
                self.type = int(type)
                self.user_num = int(user_num)
                self.file_name = self.content_list[mood_order - 1]['content']
                if len(self.file_name) > 10:
                    self.file_name = self.file_name[:10]
                self.file_name = re.sub('[^\w\u4e00-\u9fff]+', '',
                                        self.file_name)
                if len(self.file_name) <= 0:
                    self.file_name = str(mood_order)
                print("说说:", self.file_name)
                self.do_raffle()
                cmt_df = pd.DataFrame(self.cmt_list)
                like_df = pd.DataFrame(self.like_list)
                cmt_df.to_excel("评论-" + self.file_name + '.xlsx', index=False)
                like_df.to_excel("点赞-" + self.file_name + '.xlsx', index=False)
            except BaseException as e:
                if str(e) == '2':
                    exit(2)
                else:
                    self.output('----------------------')
                    print(e)
                    self.output("未知错误,请重新尝试")
                    self.output('----------------------')
Пример #3
0
    def clean_friend_data(self):
        """
        清洗好友数据,生成csv
        :return:
        """
        try:
            if len(self.friend_list) == 0:
                self.load_friend_data()
            friend_total_num = len(self.friend_list)
            print("valid friend num:", friend_total_num)
            friend_list_df = pd.DataFrame(self.friend_list)
            self.friend_detail_list = []
            if friend_total_num == 0:
                print("该用户没有好友")
                return False
            for friend in self.friend_detail:
                try:
                    friend_uin = friend['friendUin']
                    add_friend_time = friend['addFriendTime']
                    img = friend_list_df.loc[friend_list_df['uin'] ==
                                             friend_uin, 'img'].values[0]
                    nick = friend['nick']
                    nick_name = remove_special_tag(nick[str(friend_uin)])

                    common_friend_num = len(friend['common']['friend'])
                    common_group_num = len(friend['common']['group'])
                    common_group_names = friend['common']['group']
                    self.friend_detail_list.append(
                        dict(uin=self.username,
                             friend_uin=friend_uin,
                             add_friend_time=add_friend_time,
                             nick_name=nick_name,
                             common_friend_num=common_friend_num,
                             common_group_num=common_group_num,
                             common_group_names=common_group_names,
                             img=img))

                except BaseException as e:
                    if self.debug:
                        print("单向好友:", friend)
                    self.friend_detail_list.append(
                        dict(uin=0,
                             friend_uin=friend['friendUin'],
                             add_friend_time=0,
                             nick_name='单向好友',
                             common_friend_num=0,
                             common_group_num=0,
                             common_group_names='',
                             img=''))

            friend_df = pd.DataFrame(self.friend_detail_list)
            friend_df.sort_values(by='add_friend_time', inplace=True)
            friend_df['add_friend_time2'] = friend_df['add_friend_time'].apply(
                lambda x: util.get_full_time_from_mktime(x))
            friend_df.fillna('', inplace=True)

            if self.export_excel:
                friend_df.to_excel(self.FRIEND_DETAIL_EXCEL_FILE_NAME)
            if self.export_csv:
                friend_df.to_csv(self.FRIEND_DETAIL_LIST_FILE_NAME)
            if self.debug:
                print("Finish to clean friend data...")
                print("File Name:", self.FRIEND_DETAIL_LIST_FILE_NAME)
            self.friend_df = friend_df
            return True
        except BaseException as e:
            self.format_error(e, "Failed to parse friend_info")
            return False