def scroll_quinto_andar_page(x_request_id: str, div_number_row: int,
                             driver) -> None:
    """
    Function responsible for make scroll in quinto andar page
        base on below divs.

    Parameters:
            x_request_id: Unique Id.
            div_number_row: Number of the block in row in the page
            driver: google chrome instance

    Returns:
            None
    """
    try:
        element = driver.find_element_by_xpath(
            f"/html/body/div[1]/main/section[2]/div[2]/div/div[1]/div[{div_number_row+2}]"
        )
        sleep(number=3)
        if element:
            actions = ActionChains(driver)
            actions.move_to_element(element)
            actions.perform()
    except (ElementClickInterceptedException, AttributeError) as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
Пример #2
0
def scraper_flow(x_request_id: str, driver: any):
    """
        Function responsible for deal with flow logic of QuintoAndar scraper.

    Parameters:
        x_request_id: Unique id.
        driver: Google Chrome instance

    Returns:
        void
    """
    try:
        timeout_start = time.time()
        send_log(
            x_request_id=x_request_id,
            message=f"Initiating the flow of scraper. Time: {timeout_start}",
        )
        recursive_scraper_logic(
            x_request_id=x_request_id,
            div_number_row=quinto_andar["div_number_row_initiator"],
            div_number_column=quinto_andar["div_number_column_initiator"],
            limit_scraper=quinto_andar["limit_scraper"],
            timeout_start=timeout_start,
            driver=driver,
        )
        sleep(10)
        send_log(x_request_id=x_request_id,
                 message="Finished the flow of scraper.")
    except (WebDriverException, ElementNotInteractableException) as exception:
        error_handler(
            x_request_id=x_request_id,
            _msg="Exception occurred on scraper_flow",
            exception=exception,
        )
def resident_localization_data(x_request_id: str, driver) -> list:
    """
        Function responsible for get all information
            about localization of specific residence.

        Parameters:
                x_request_id: unique id
                driver: google chrome instance
    Returns:
        Object { street_name<String>, district_name<String>, state_name<String> }
    """
    send_log(
        x_request_id=x_request_id,
        message="Searching for address of residence...",
    )
    sleep(number=7)
    try:
        localization_data = driver.find_element_by_xpath(
            "/html/body/div[1]/div/main/section/div/div[1]/div/div[2]/div[2]/p"
        )

        if localization_data:
            send_log(
                x_request_id=x_request_id,
                message="Found information about address...",
            )

            localization_data = localization_data.text
            return localization_data.split(",") if localization_data else None
    except (AttributeError, NoSuchElementException) as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
Пример #4
0
def residence_size(x_request_id: str, driver) -> int:
    """
        Function responsible for return the size of the residence.

        Parameters:
                x_request_id: unique id
                driver: google chrome instance
    Returns:
        int: size of residence
    """
    send_log(
        x_request_id=x_request_id,
        message="Searching for the number of bedrooms...",
    )
    sleep(number=2)
    try:
        size_residence_data = driver.find_element_by_xpath(
            "/html/body/div[1]/div/main/section/div/div[1]/div/div[3]/div/div[1]/div/div/span"
        )
        if size_residence_data:
            send_log(
                x_request_id=x_request_id,
                message="Found information about bedrooms...",
            )

            size_residence = size_residence_data.text
            # Start verification if has digit
            # then going to return. If do not have then return 0.
            return (int(re.findall(r"\d+", size_residence)[0]) if
                    verification_string_has_digit(x_request_id=x_request_id,
                                                  text=size_residence) else 0)
    except (AttributeError, NoSuchElementException) as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
Пример #5
0
def get_residence_id(x_request_id: str, driver: any) -> int:
    """
    Function responsible for return id of residence.

    Parameters:
            x_request_id: unique id
            driver: google chrome instance
    Returns:
        int
    """
    send_log(x_request_id=x_request_id,
             message="Searching for the residence id...")
    sleep(number=2)
    try:
        residence_id = driver.find_element_by_xpath(
            "/html/body/div[1]/div/main/section/div/div[1]/nav/ol/li[5]/a")

        if residence_id:
            send_log(
                x_request_id=x_request_id,
                message="Found id of residence...",
            )

            residence_id_text = residence_id.text

            # Start verification if has digit
            # then going to return. If do not have then return 0.
            return (int(re.findall(r"\d+", residence_id_text)[0])
                    if verification_string_has_digit(x_request_id=x_request_id,
                                                     text=residence_id_text)
                    else 0)
    except (AttributeError, NoSuchElementException) as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
def get_furniture_flag(x_request_id: str, driver) -> bool:
    """
    Function responsible for get flag that represent if the resident
        already have furniture.

    Parameters:
            x_request_id: unique id
            driver: google chrome instance
    Returns:
        int
    """
    send_log(x_request_id=x_request_id,
             message="Searching for a furniture flag...")
    sleep(number=2)
    try:
        flag_furniture_data = driver.find_element_by_xpath(
            "/html/body/div[1]/div/main/section/div/div[1]/div/div[3]/div/div[7]/div/div/span"
        )
        if flag_furniture_data:
            send_log(
                x_request_id=x_request_id,
                message="Found information about furniture in the residence...",
            )
            flag_furniture = flag_furniture_data.text
            flag_furniture = flag_furniture.lower()
            return bool("sem" not in flag_furniture)
    except (AttributeError, NoSuchElementException) as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
Пример #7
0
def number_of_rooms(x_request_id: str, driver) -> int:
    """
        Function responsible for return number of rooms.

        Parameters:
                x_request_id: unique id
                driver: google chrome instance
    Returns:
        Bool <True or False>
    """
    send_log(x_request_id=x_request_id,
             message="Searching for number of rooms...")
    sleep(number=2)
    try:
        number_rooms_data = driver.find_element_by_xpath(
            "/html/body/div[1]/div/main/section/div/div[1]/div/div[3]/div/div[2]/div/div"
        )

        if number_rooms_data:
            send_log(
                x_request_id=x_request_id,
                message="Found information about number of rooms...",
            )

            number_rooms = number_rooms_data.text

            # Start verification if has digit
            # then going to return. If do not have then return 0.
            return (int(re.findall(r"\d+", number_rooms)[0])
                    if verification_string_has_digit(x_request_id=x_request_id,
                                                     text=number_rooms) else 0)
    except (AttributeError, NoSuchElementException) as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
def pet_flag(x_request_id: str, driver) -> bool:
    """
        Function responsible for flag if the residence can have pet or not.

        Parameters:
                x_request_id: unique id
                driver: google chrome instance
    Returns:
        Bool <True or False>
    """
    send_log(x_request_id=x_request_id, message="Searching for pet flag...")
    sleep(number=2)
    try:
        pet_flag_data = driver.find_element_by_xpath(
            "/html/body/div[1]/div/main/section/div/div[1]/div/div[3]/div/div[6]/div/div/span"
        )
        if pet_flag_data:
            send_log(
                x_request_id=x_request_id,
                message="Found information about pet flag...",
            )

            pet_flag_text = pet_flag_data.text

            send_log(
                x_request_id=x_request_id,
                message=f"Pet flag informatio is {pet_flag_text}",
            )

            return not bool("Não" in pet_flag_text or "Nao" in pet_flag_text)
    except (AttributeError, NoSuchElementException) as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
Пример #9
0
def get_metro_flag(x_request_id: str, driver) -> bool:
    """
        Function responsible for identify if has metro close to the residence.

        Parameters:
                x_request_id: unique id
                driver: google chrome instance
    Returns:
        Bool <True or False>
    """
    send_log(x_request_id=x_request_id, message="Searching for subway flag...")
    sleep(number=2)
    try:
        metro_flag_data = driver.find_element_by_xpath(
            "/html/body/div[1]/div/main/section/div/div[1]/div/div[3]/div/div[8]/div/div/span"
        )
        if metro_flag_data:
            send_log(
                x_request_id=x_request_id,
                message="Found information about subway...",
            )

            metro_flag_text = metro_flag_data.text

            return bool(metro_flag_text.find("Não"))
    except (AttributeError, NoSuchElementException) as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
Пример #10
0
def get_type_residence(x_request_id: str, driver) -> str:
    """
        Function responsible for return type of residence.

        Parameters:
                x_request_id: unique id
                driver: google chrome instance
    Returns:
        str
    """
    send_log(
        x_request_id=x_request_id,
        message="Searching for the type of residence...",
    )
    sleep(number=2)
    try:
        type_residence_data = driver.find_element_by_xpath(
            "/html/body/div[1]/div/main/section/div/div[1]/div/div[2]/div[1]/h1"
        )
        if type_residence_data:
            send_log(
                x_request_id=x_request_id,
                message="Found the type of residence...",
            )

            type_residence = type_residence_data.text
            type_residence = type_residence.lower()
            if "casa" in type_residence:
                return "house"
            return "apartment"
    except (AttributeError, NoSuchElementException) as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
Пример #11
0
def get_rent_values(x_request_id: str, driver) -> dict:
    """
        Function responsible for get all values about the rent of the residence.

        Parameters:
                x_request_id: unique id
                driver: google chrome instance
    Returns:
        dict
    """
    send_log(
        x_request_id=x_request_id,
        message="Searching for a values of the rent...",
    )
    try:
        sleep(number=2)
        rent_values_data = driver.find_element_by_xpath(
            "/html/body/div[1]/div/main/section/div/div[2]/section/div/ul")

        if rent_values_data:
            send_log(x_request_id=x_request_id, message="Found the values...")

            rent_values_dict = {
                "rent_without_taxes": int,
                "condominium_tax": int,
                "house_tax": int,
                "fire_insurance": int,
                "service_tax": int,
                "total_rent_value": int,
            }

            rent_values = rent_values_data.text
            if rent_values:
                rent_values = rent_values.replace("Incluso", "0")
                rent_values = re.findall(r"(?<![.,])\d+[,.]{0,1}\d*",
                                         rent_values)

                # Going to get values in case of find 6 numbers in array.
                if len(rent_values) == 6:
                    rent_values_dict["rent_without_taxes"] = rent_values[0]
                    rent_values_dict["condominium_tax"] = rent_values[1]
                    rent_values_dict["house_tax"] = rent_values[2]
                    rent_values_dict["fire_insurance"] = rent_values[3]
                    rent_values_dict["service_tax"] = rent_values[4]
                    rent_values_dict["total_rent_value"] = rent_values[5]

                return rent_values_dict
    except (AttributeError, NoSuchElementException) as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
Пример #12
0
def main(driver: any, queue: any) -> None:
    """
    Consumer responsible for receive messages from SQS Queue

    Parameters:
        driver: any
        queue: any

    Returns:
        None
    """
    try:
        while True:
            messages = receive_messages(queue=queue, max_number=1, wait_time=0)
            if len(messages) == 0:
                send_log(
                    x_request_id="",
                    message="QUEUE with 0 messages, going to send default event in 30 minutes...",
                )
                sleep(number=1800)
                dealing_with_empty_queue(queue=queue)
            else:
                for message in messages:
                    x_request_id = request_handler(message=message.body)
                    send_log(
                        x_request_id=x_request_id,
                        message="Receive message going to start scraper flow...",
                    )
                    consumer_message_handler(
                        message=message.body,
                        x_request_id=x_request_id,
                        driver=driver,
                    )
                    delete_message(x_request_id=x_request_id, message=message)
    except AttributeError as exception:
        error_handler(exception=exception)
def recursive_scraper_logic(
    x_request_id: str,
    div_number_row: int,
    div_number_column: int,
    limit_scraper: int,
    timeout_start,
    driver,
):
    """
        Function responsible for deal with recursive scraper logic.

    Parameters:
        x_request_id: Unique id.
        div_number_row: Number of the block in row in the page
        div_number_column: Number of the block in column in page
        limit_scraper: Number responsible for define the limit of scraper to the page
        timeout_start: The time that scraper begin
        driver: Google Chrome instance

    Notes:
        The function define the number of scraper that happens
            in the page until event of scroll happens. And the logic
            start again.

    Returns:
        void
    """
    timeout = 900

    sleep(15)

    # Scraper will happen for 15 minutes #
    if time.time() < timeout_start + timeout:
        link = get_link_of_resident_block(
            x_request_id=x_request_id,
            div_number_row=div_number_row,
            div_number_column=div_number_column,
            driver=driver,
        )

        quinto_andar_data = QuintoAndarSchema()

        if link:
            main_window = save_window_opener(x_request_id=x_request_id,
                                             driver=driver)
            open_new_tab(x_request_id=x_request_id, link=link)
            event_switch_right_window(x_request_id=x_request_id, driver=driver)
            event_switch_to_tab_window(main_window=main_window, driver=driver)
            sleep(8)
            send_log(
                x_request_id=x_request_id,
                message="Initiation of collection of data...",
            )
            resident_data = get_resident_block_data(
                x_request_id=x_request_id,
                quinto_andar_data=quinto_andar_data,
                driver=driver,
            )
            creation_residence_data(x_request_id=x_request_id,
                                    residence_data=resident_data)
            close_current_tab(driver=driver, main_window=main_window)
            send_log(x_request_id=x_request_id,
                     message="Return to main screen...")
            sleep(1)
            driver.switch_to_window(main_window)

        div_number_row, div_number_column = recursive_column_row_logic(
            x_request_id=x_request_id,
            div_number_column=div_number_column,
            div_number_row=div_number_row,
            limit_scraper=limit_scraper,
            driver=driver,
        )

        send_log(
            x_request_id=x_request_id,
            message=f"Data of residence is: {quinto_andar_data}",
        )
        limit_scraper += 1

        recursive_scraper_logic(
            x_request_id=x_request_id,
            div_number_row=div_number_row,
            div_number_column=div_number_column,
            limit_scraper=limit_scraper,
            timeout_start=timeout_start,
            driver=driver,
        )
Пример #14
0
def test_sleep_function_should_return_none():
    """Function responsible for test sleep function that should return None"""
    assert sleep(number=1) is None