Пример #1
0
def extract_answerers_followers(user_id, user_id_list):
    mongo = Mongo_2()
    follower_id = []
    if user_id in user_id_list:
        temp_list = mongo.db.haoyouguanxi.answerer_follower.find(
            {"user_id": user_id})
        for list in temp_list:
            if list.get('follower') is not None:
                for l in list.get('follower'):
                    follower_id.append(l.get('follower_id'))
                break
        if len(follower_id) == 0:
            temp_list1 = mongo.db.haoyouguanxi.answerer_followers.find(
                {"user_id": user_id})
            for list in temp_list1:
                if list.get('follower') is not None:
                    for l in list.get('follower'):
                        follower_id.append(l.get('follower_id'))
                    break
        follower_id.append('user_exists')
        mongo.client.close()
        return follower_id
    else:
        mongo.client.close()
        return follower_id
Пример #2
0
    def __init__(self, fileNum):
        self.answer_id = None
        self.is_del = False
        self.mongo = Mongo_2()
        self.fileNum = fileNum
        self.file = None
        self.start = None
        self.end = None
        self.answerID_list = None
        # self.proxy = None
        self.type = None
        self.state = False
        self.answer_type = None
        self.voter_id_list = []

        self.current_proxy = None
        self.headers = {
            'Accept':
            'textml,application/json,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Encoding': 'gzip, deflate, sdch',
            'Accept-Language':
            'en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4,zh-TW;q=0.2',
            'Host': 'www.zhihu.com',
            'User-Agent':
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.81 Safari/537.36',
            'Referer': 'http://www.zhihu.com/',
            'Cookie': None,
            'x-udid': None,
        }

        self.get_voters()
Пример #3
0
def extract_answerID():
    answerID_list = []
    mongo = Mongo_2()
    temp_list = mongo.db.question_answers.find({}, {
        'answer_num': 1,
        'answers': 1
    })
    # 旧的回答
    temp_list1 = mongo.db.answer.find({}, {'answer_num': 1, 'answers': 1})
    for item in temp_list:
        if item.get('answer_num') == 0:
            continue
        else:
            answer_list = item.get('answers')
            for answer in answer_list:
                answerID_list.append(answer.get('answer_id'))

    for item in temp_list1:
        if item.get('answer_num') == 0:
            continue
        else:
            answer_list = item.get('answers')
            for answer in answer_list:
                answerID_list.append(answer.get('answer_id'))

    mongo.client.close()
    #  去重
    return list(set(answerID_list))
Пример #4
0
def extract_last_followers():
    mongo = Mongo_2()
    temp = mongo.db.followers_last.find({})
    user_id_list = []
    for l in temp:
        user_id_list.append(l.get('user_id'))
    mongo.client.close()
    return user_id_list
Пример #5
0
def extract_questionUrl():
    questionUrl_list = []
    mongo = Mongo_2()
    items = mongo.db.question_url.find(
        {}, {"question_url": 1})  #find()----检测字符串中是否包含子字符串str
    for item in items:
        questionUrl_list.append(item['question_url'])
    mongo.client.close()
    return questionUrl_list
Пример #6
0
def extract_question_followers(question_id):
    mongo = Mongo_2()
    lists = mongo.db.english_followers_new.find({"question_id": question_id})
    follower_id = []
    for l in lists:
        if l.get('followers') is not None:
            for l1 in l.get('followers'):
                follower_id.append(l1)
    mongo.client.close()
    return follower_id
Пример #7
0
    def __init__(self, fileNum):
        self.fileNum = fileNum
        self.mongo = Mongo_2()
        self.userID_list = []
        self.id = None
        self.file = None
        self.start = None
        self.end = None
        self.driver = None

        self.get_people_detail()
Пример #8
0
def extract_answer_voters(answer_id):
    mongo = Mongo_2()
    # 1
    lists = mongo.db.voters_old.find({"answer_id": answer_id})
    voter_id = []
    for l in lists:
        if l.get('voters') is not None:
            for l1 in l.get('voters'):
                voter_id.append(l1)
    mongo.client.close()
    return voter_id
Пример #9
0
def extract_answer_comments(answer_id):
    mongo = Mongo_2()
    temp_list = mongo.db.comment.find({"answer_id": answer_id})
    comment_id = []
    for list in temp_list:
        if list.get('comment_num') is not None:
            comment_id.append(list.get('comment_num'))
        break

    mongo.client.close()
    return comment_id
Пример #10
0
def extract_question_answers(question_id):
    mongo = Mongo_2()
    temp_list = mongo.db.answer.find({"question_id": question_id})
    answer_id = []
    for list in temp_list:
        if list.get('answer_num') is not None:
            answer_id.append(list.get('answer_num'))
        break

    mongo.client.close()
    return answer_id
Пример #11
0
def extract_last_voters():
    mongo = Mongo_2()
    temp = mongo.db.voters_last.find({})
    user_id_list = []
    for l in temp:
        user_id_list.append(l.get('user_id'))
    mongo.client.close()
    return user_id_list


# if __name__ == "__main__":
#     print extract_answer_voters(255313072)
Пример #12
0
def extract_voters_following(user_id, user_id_list):
    mongo = Mongo_2()
    following_id = []
    if user_id in user_id_list:
        temp_list = mongo.db.haoyouguanxi.voter_following.find(
            {"user_id": user_id})
        for list in temp_list:
            if list.get('following') is not None:
                for l in list.get('following'):
                    following_id.append(l.get('following_id'))
                break
        mongo.client.close()
        following_id.append('user_exists')
        return following_id
    else:
        mongo.client.close()
        return following_id
Пример #13
0
    def __init__(self, fileNum):

        # self.question_url = question_url
        # self.id = question_url.replace('/question/', '')
        self.question_url = None
        self.id = None
        self.log_url = None
        self.url = None
        # self.log_url = 'https://www.zhihu.com' + question_url + '/log'
        # self.url = 'https://www.zhihu.com' + question_url #'/answers/created'
        self.url_domain = 'https://www.zhihu.com'
        self.content = None
        self.is_del = False
        self.fileNum = fileNum
        self.file = None
        self.start = None
        self.end = None
        # self.proxy = None
        self.type = None
        self.questionUrl_list = None
        self.topic_list = []
        self.log = []
        self.mongo = Mongo_2()
        self.current_proxy = None
        self.state = False
        self.question_type = None
        self.answer_id_list = []

        self.headers = {
            'Accept':
            'textml,application/json,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Encoding': 'gzip, deflate, sdch',
            'Accept-Language':
            'en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4,zh-TW;q=0.2',
            'Host': 'www.zhihu.com',
            'User-Agent':
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36',
            'Referer': 'http://www.zhihu.com/',
            'Cookie': None,
            'x-udid': None,
        }
        self.get_answers()
Пример #14
0
 def __init__(self):
     self.user_list = []
     self.mongo = Mongo_2()
Пример #15
0
    def __init__(self):

        self.mongo = Mongo_2()
Пример #16
0
            # self.mongo.db.question_followers_userDetail_2222.insert(detail)
            # self.mongo.db.voters_userDetail_2222.insert(detail)
            # self.mongo.db.commenters_userDetail.insert(detail)
            self.delLogger(logger)
            self.mongo.client.close()

    # 删除日志手柄
    def delLogger(self, myLogger):
        for myHandler in myLogger.handlers:
            myHandler.close()
            myLogger.removeHandler(myHandler)

if __name__ == '__main__':

    userID_list = []
    mongo = Mongo_2()
    items = mongo.db.answerers.find()
    for item in items:
        userID_list.append(item.get('user_id'))
    mongo.client.close()

    ud = UserDetail()

    #driver = webdriver.Chrome()
    driver = webdriver.PhantomJS(executable_path=r'D:\PhantomJS\phantomjs-2.1.1-windows\bin\phantomjs.exe')

    dt = re.sub(r'[^0-9]', '', str(datetime.datetime.now().strftime('%Y-%m-%d-%H:%M:%S')))
    for i in xrange(0, 2367):

        ud.get_people_detail(userID_list[i], i , dt)
        #time.sleep(1)
Пример #17
0
 def __init__(self):
     self.followers_list = []
     self.followers_list_old = []
     self.mongo = Mongo_2()
Пример #18
0
def extract_voters_info():
    mongo = Mongo_2()
    temp_list = mongo.db.voters_info_1111.find({}, {"user_id": 1})
    dt = {list.get('user_id'): "" for list in temp_list}
    mongo.client.close()
    return dt