Пример #1
0
def refresh_asked_questions_stats():
    session = get_new_session()
    driver = get_driver()
    questions_details = session.query(QuoraQuestionAccountDetails).filter(
        QuoraQuestionAccountDetails.asked == True).all()
    for question_detail in questions_details:
        driver.get(question_detail.question.question_url)
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        qaqs = session.query(QuoraAskedQuestionStats).filter(
            QuoraAskedQuestionStats.question == question_detail.question
        ).filter(QuoraAskedQuestionStats.recorded_on ==
                 datetime.now().date()).first()
        if qaqs is None:
            qaqs = QuoraAskedQuestionStats()
            qaqs.question = question_detail.question
            qaqs.recorded_on = datetime.now().date()
        # GET NUMBER OF ANSWERS
        for i in soup.findAll(
                'div',
                attrs=
            {
                'class':
                'q-text qu-medium qu-fontSize--regular qu-color--gray_dark qu-passColorToLinks'
            }):
            if "Answer" in i.get_text():
                qaqs.answer_count = int(
                    replace_all(i.get_text(), {
                        'Answer': '',
                        's': '',
                        ',': ''
                    }).strip())

        driver.get(question_detail.question.question_url + '/log')
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        # IDENTIFIES STATS PER QUESTION
        for j in soup.findAll(
                'div',
                attrs={
                    'class':
                    'q-flex qu-py--tiny qu-px--medium qu-alignItems--center'
                }):
            if "Public Follower" in j.get_text():
                qaqs.follower_count = int(
                    replace_all(j.get_text(), {
                        'Public Follower': '',
                        's': '',
                        ',': ''
                    }).strip())
            if "View" in j.get_text():
                qaqs.view_count = int(
                    replace_all(j.get_text(), {
                        'View': '',
                        's': '',
                        ',': ''
                    }).strip())
        session.add(qaqs)

    driver.quit()
    session.commit()
    return {}
Пример #2
0
def refresh_data(time, put_todays_date):
    session = get_new_session()
    divisions = session.query(Division).order_by(asc(Division.id))
    keywords = session.query(QuoraKeyword).all()
    question_list = []
    url_set = set()
    driver = get_driver()

    for row in session.query(QuoraQuestion.question_url).filter(
            QuoraQuestion.asked_on > get_time_interval(time)):
        url_set.add(
            replace_all(str(row.question_url), {"https://www.quora.com", ""}))

    for divisionIndexer in divisions:
        for keywordIndexer in keywords:
            if keywordIndexer.division == divisionIndexer:
                url = "https://www.quora.com/search?q=" + replace_all(
                    keywordIndexer.keyword,
                    {" ", "+"}) + "&time=" + time + "&type=question"
                driver.get(url)
                scroll_to_bottom(driver, LOAD_TIME)
                soup = BeautifulSoup(driver.page_source, 'html.parser')

                # GET EACH QUESTION LINK & QUESTION TEXT
                for link in soup.findAll('a', attrs={'class':
                                                     'question_link'}):
                    question_link = link['href']
                    # UNANSWERED QUESTIONS WILL REDIRECT TO ORIGINAL URL ANYWAY
                    if '/unanswered' in question_link:
                        question_link = question_link.replace(
                            '/unanswered', '', 1)
                    if str(question_link) not in url_set:
                        url_set.add(question_link)
                        question = QuoraQuestion()
                        question.question_url = (
                            'https://www.quora.com' +
                            question_link).encode(encoding)
                        question.question_text = link.find(
                            'span', attrs={
                                'class': 'ui_qtext_rendered_qtext'
                            }).text.encode(encoding)
                        question.division_id = divisionIndexer.id
                        question_list.append(question)
    driver.quit()
    session.bulk_save_objects(
        fill_dates(question_list, put_todays_date, session))
    session.commit()

    return {}
Пример #3
0
def refresh_requested_questions():
    session = get_new_session()
    accounts = session.query(QuoraAccount).all()
    default_division = session.query(Division).filter(
        Division.division == 'Vidyalankar').first()
    for account in accounts:
        driver = get_driver()
        login_to_account(driver, account)
        driver.get("https://www.quora.com/answer/requests")
        scroll_to_bottom(driver, LOAD_TIME)
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        # GET REQUESTED QUESTIONS
        for i in soup.findAll(
                'a',
                attrs=
            {
                'class':
                'q-box qu-cursor--pointer qu-hover--textDecoration--underline'
            }):
            question_url = (
                "https://www.quora.com/" +
                i.get('href').replace('unanswered/', '', 1)).encode(encoding)
            question = session.query(QuoraQuestion).filter(
                QuoraQuestion.question_url == question_url).first()
            if question is None:
                question = QuoraQuestion()
                question.question_url = question_url
                question.question_text = i.get_text().encode(encoding)
                question.division = default_division
                question.asked_on = datetime.now().date(
                )  #DATE DOES NOT MATTER FOR THIS QUESTION
                session.add(question)
            qqad = session.query(QuoraQuestionAccountDetails).filter(
                QuoraQuestionAccountDetails.account == account).filter(
                    QuoraQuestionAccountDetails.question == question).first()
            if qqad is None:
                qqad = QuoraQuestionAccountDetails()
                qqad.account = account
                qqad.question = question
            qqad.requested = True
            session.add(qqad)
        driver.quit()
    session.commit()
    return {}
Пример #4
0
def fill_dates(question_list, put_todays_date, session):
    if put_todays_date:
        fixed_asked_on = datetime.now().date()
        for question in question_list:
            question.asked_on = fixed_asked_on

    else:
        driver = get_driver()
        for question in question_list:
            link = question.question_url
            if type(link) != str:
                link = link.decode(encoding)
            link += '/log'

            driver.get(link)
            scroll_to_bottom(driver, LOAD_TIME)
            soup = BeautifulSoup(driver.page_source, 'html.parser')

            # GET DATE
            for each_part in soup.select('div[class*="pagedlist_item"]'):
                if each_part.get_text().find("Question added by") >= 0:
                    date_text = each_part.get_text()
                    # FOR QUESTIONS ASKED LESS THAN 24 HOURS AGO
                    if 'ago' in date_text:
                        question.asked_on = datetime.now().date()
                        break
                    if 'yesterday' in date_text:
                        question.asked_on = (datetime.now() -
                                             relativedelta(days=1)).date()
                        break
                    # FOR QUESTIONS BEFORE
                    date_text = (date_text[-12:].strip())
                    if ',' in date_text:
                        question.asked_on = datetime.strptime(
                            date_text, '%b %d, %Y').date()
                    else:
                        date_text = (date_text[-6:].strip())
                        question.asked_on = datetime.strptime(
                            date_text, '%b %y').date()

        driver.quit()
    return question_list
Пример #5
0
def refresh_accounts_stats(time_period):
    session = get_new_session()
    accounts = session.query(QuoraAccount).all()
    for account in accounts:
        driver = get_driver()
        login_to_account(driver, account)
        driver.get('https://quora.com/stats')
        list = driver.find_element_by_class_name("menu_link")
        list.click()
        time.sleep(LOAD_TIME)
        last_week = driver.find_element_by_name("1")
        last_week.click()
        time.sleep(LOAD_TIME)

        soup = BeautifulSoup(driver.page_source, 'html.parser')
        stats_count = []
        for i in soup.findAll("a", attrs={"heads_up_item"}):
            for j in soup.findAll("p", attrs={"big_num"}):
                stats_count.append(j.get_text(strip=True))
            break

        xaxis_dates = []
        stats_arrays_object = {}
        quora_account_stats_array = []
        for i in [[0, 'Views'], [1, 'Upvotes'], [2, 'Shares']]:
            if int(stats_count[i[0]]) == 0:
                continue
            if xaxis_dates.__len__() == 0:
                xaxis_dates = get_qas_xaxis_dates(soup)
                for date_object in xaxis_dates:
                    qas = QuoraAccountStats()
                    qas.recorded_on = date_object
                    qas.account = account
                    quora_account_stats_array.append(qas)

            stats_arrays_object[i[1]] = get_qas_graph_data(soup)

            if i[0] < 2:
                next_stat_element = driver.find_elements_by_class_name(
                    "heads_up_item")[i[0] + 1]
                next_stat_element.click()
                time.sleep(LOAD_TIME)
                soup = BeautifulSoup(driver.page_source, 'html.parser')

        count = 0
        for qas_element in quora_account_stats_array:
            if "Views" in stats_arrays_object and stats_arrays_object[
                    "Views"].__len__() > 0:
                qas_element.view_count = stats_arrays_object["Views"][count]
            if "Upvotes" in stats_arrays_object and stats_arrays_object[
                    "Upvotes"].__len__() > 0:
                qas_element.upvote_count = stats_arrays_object["Upvotes"][
                    count]
            if "Shares" in stats_arrays_object and stats_arrays_object[
                    "Shares"].__len__() > 0:
                qas_element.share_count = stats_arrays_object["Shares"][count]
            saved_element = session.query(QuoraAccountStats).filter(
                QuoraAccountStats.account == qas_element.account).filter(
                    QuoraAccountStats.recorded_on ==
                    qas_element.recorded_on).first()
            if saved_element is None:
                session.add(qas_element)
            else:
                if qas_element.view_count is not None:
                    saved_element.view_count = qas_element.view_count
                if qas_element.upvote_count is not None:
                    saved_element.upvote_count = qas_element.upvote_count
                if qas_element.share_count is not None:
                    saved_element.share_count = qas_element.share_count
            count = count + 1

        driver.quit()

    session.commit()
    return {}
Пример #6
0
def refresh_accounts_data():
    session = get_new_session()
    script = session.query(Script).filter(
        Script.name == 'Refresh_Quora_Accounts_Data').first()
    execution_log = session.query(ExecutionLog).filter(
        ExecutionLog.script_id == script.id).first()
    accounts = session.query(QuoraAccount).all()
    driver = get_driver()
    for account in accounts:
        driver.get(account.link)
        scroll_to_bottom(driver, LOAD_TIME)
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        breakLoop = False
        # LOOP IDENTIFIES CLASS OF EVERY QUESTION
        for i in soup.findAll(
                'div', attrs={'class': 'q-box qu-pt--medium qu-pb--medium'}):
            if breakLoop:
                break
            # GET EACH QUESTION DATE
            for j in i.findAll(
                    'div',
                    attrs=
                {
                    'class':
                    'q-text qu-color--gray qu-fontSize--small qu-passColorToLinks qu-truncateLines--1'
                }):
                date_string = replace_all(j.getText(), {
                    "Answered": "",
                    "Updated": ""
                })
                date_of_answer = datetime.strptime(date_string.strip(),
                                                   '%B %d, %Y')
                #TAKING ONE EXTRA DAY BECAUSE QUESTIONS CAN BE ASKED ON DIFFERENT TIMES ON THE SAME DAY
                if date_of_answer < execution_log.execution_time - relativedelta(
                        days=1):
                    breakLoop = True
                    break

                # GET ALL QUESTIONS NEWLY ANSWERED
                for k in i.findAll(
                        'a',
                        attrs=
                    {
                        'class':
                        'q-box qu-cursor--pointer qu-hover--textDecoration--underline'
                    }):
                    question_link = ("https://www.quora.com" + k.get('href'))
                    #SAVE QUESTION AS ANSWERED IN DB (TO DO)
                    question = session.query(QuoraQuestion).filter(
                        QuoraQuestion.question_url == question_link).first()
                    if question is not None:
                        qqad = session.query(
                            QuoraQuestionAccountDetails).filter(
                                QuoraQuestionAccountDetails.account == account
                            ).filter(QuoraQuestionAccountDetails.question ==
                                     question).first()
                        if qqad is None:
                            qqad = QuoraQuestionAccountDetails()
                            qqad.account = account
                            qqad.question = question
                        qqad.answered = True
                        session.add(qqad)

        # GET FOLLOWERS COUNT
        count = 0
        for i in soup.findAll('div', attrs={'class':
                                            'q-box qu-display--flex'}):
            if count == 4:
                follower_count = replace_all(i.getText(), {
                    "Follower": "",
                    "s": ""
                })
                follower_count_object = session.query(
                    QuoraAccountStats).filter(
                        QuoraAccountStats.recorded_on == datetime.now().date()
                    ).filter(
                        QuoraAccountStats.account_id == account.id).first()
                if follower_count_object is None:
                    follower_count_object = QuoraAccountStats()
                    follower_count_object.account = account
                    follower_count_object.recorded_on = datetime.now().date()
                follower_count_object.total_followers = follower_count
                session.add(follower_count_object)
                break
            count += 1

    driver.quit()
    #REFRESH LAST EXECUTED DATE
    execution_log.execution_time = datetime.now()
    session.add(execution_log)
    session.commit()
    return {}