示例#1
0
class AgendaBolsoCrawler:
    def __init__(self, day):
        self.driver = WebDriverWrapper()._driver
        self.wait = WebDriverWait(self.driver, 10)

        #Diario ou retroativo

        self.scrape(day)
        #self.retroativo()

    def retroativo(self):
        #Defina os dias de inicios e fim
        start = date(2019, 1, 1)
        end = date(2019, 7, 7)
        delta = end - start
        for i in range(delta.days):
            self.scrape(start + timedelta(days=i))

    def scrape(self, day):
        items_agenda = []
        day_string = day.strftime("%Y-%m-%d")
        print('Dia: ' + day_string)
        self.driver.get(
            'https://www.gov.br/planalto/pt-br/acompanhe-o-planalto/agenda-do-presidente-da-republica/'
            + day_string)
        time.sleep(5)
        compromissos = self.driver.find_elements_by_class_name(
            "item-compromisso")
        try:
            for compromisso in compromissos:
                horario = '*Horário:* ' + compromisso.find_element_by_xpath(
                    ".//time").text + '\n'
                titulo = compromisso.find_element_by_xpath(
                    ".//h4[@class='compromisso-titulo']").text + '\n'
                local = '*Local:* ' + compromisso.find_element_by_xpath(
                    ".//p[@class='compromisso-local']").text + '\n'
                items_agenda.append([horario, titulo, local])
            if (items_agenda):
                self.send_to_chats(items_agenda, day_string)
        except:
            print('Not Found')

    def send_to_chats(self, rows, day_string):
        chats = []
        string_rows = '\n'.join(''.join(row) for row in rows)
        string_rows = '*Agenda Bolsonaro ' + day_string + '* \n' + string_rows
        bot_updates = BOT_INSTANCE.getUpdates()
        for update in bot_updates:
            try:
                chats.append(update['message']['chat']['id'])
            except:
                print('empity message')
        #deduplicate
        chats = list(set(chats))
        print(chats)
        print(string_rows)
        for chat in chats:
            BOT_INSTANCE.sendMessage(chat, string_rows, parse_mode='markdown')
示例#2
0
def lambda_handler(*args, **kwargs):
    driver = WebDriverWrapper()
    example_text = ''
    sekinAl="https://seekingalpha.com/market-news/all" 
# driver.get("http://www.python.org")
    driver.get(sekinAl)
# assert "Python" in driver.title
# driver
    elem = driver.find_elements_by_class_name("title")
    for i in elem:
        if 'doge' in i.text
        print(i.text)
    
# /html/body/div[3]/div/div/div[2]/ul/li[2]/div[2]/div[1]
# elem.clear()
# elem.send_keys("pycon")
# elem.send_keys(Keys.RETURN)
# assert "No results found." not in driver.page_source
    driver.close()


    return example_text
示例#3
0
def lambda_handler(*args, **kwargs):
    driver = WebDriverWrapper()._driver
    db = firebase.FirebaseApplication(
        'https://dash-associados-default-rtdb.firebaseio.com/', None)

    data = {"username": "******"}

    db.post('/users', data)

    # driver.get("https://associados.amazon.com.br")
    # driver.get_url('http://example.com')
    # example_text = driver.get_inner_html('(//div//h1)[1]')

    # driver.close()

    driver.get("https://associados.amazon.com.br")

    try:
        cookies_file = open("cookies.txt")
        if os.fstat(cookies_file.fileno()).st_size == 0:
            raise IOError

        for cookie in cookies_file:
            driver.add_cookie(json.loads(cookie))
    except IOError:
        driver.find_element_by_xpath("//a[@href='/login']").click()

        username = driver.find_element_by_id("ap_email")
        username.clear()
        username.send_keys("*****@*****.**")

        password = driver.find_element_by_id("ap_password")
        password.clear()
        password.send_keys("infOaz19!")

        driver.find_element_by_id("signInSubmit").click()

        while ('home' not in driver.current_url):
            if 'approval' in driver.current_url:
                print('Aprove o login no celular.')
                fastrack = WebDriverWait(driver, 300).until(
                    ec.visibility_of_element_located(
                        (By.XPATH,
                         "//div[@data-assoc-eid='ac-home-month-summary']")))
            elif 'signin' in driver.current_url:
                captcha_img = driver.find_element_by_xpath(
                    "//img[@alt='CAPTCHA']").get_attribute("src")
                print(captcha_img)

                captcha_input = driver.find_element_by_id("auth-captcha-guess")
                captcha = input("Digite o CAPTCHA e aperte ENTER\n")
                print(f'Usando o captcha "{captcha}"')
                captcha_input.send_keys(captcha)
                password = driver.find_element_by_id("ap_password")
                password.clear()
                password.send_keys("infOaz19")
                driver.find_element_by_id("signInSubmit").click()

        with open("cookies.txt", "w") as cookies_file:
            for cookie in driver.get_cookies():
                cookies_file.write(json.dumps(cookie) + '\n')

    finally:
        cookies_file.close()

        summaries = driver.find_elements_by_xpath(
            "//div[@data-assoc-eid='ac-home-month-summary']//div[contains(@class, 'a-row')]//div[contains(@class, 'a-ws-span-last')]"
        )
        total_sent = summaries[0].text
        total_gains = summaries[1].text
        total_ordered = summaries[2].text
        total_clicks = summaries[3].text

        driver.close()

        return f'Produtos pedidos: "{total_sent}" - Ganho: "{total_gains}" - Produtos pedidos: "{total_ordered}" - Cliques: "{total_clicks}"'