示例#1
0
def number_of_rooms(x_request_id: str, driver) -> int:
    """
        Function responsible for return number of rooms.

        Parameters:
                x_request_id: unique id
                driver: google chrome instance
    Returns:
        Bool <True or False>
    """
    send_log(x_request_id=x_request_id,
             message="Searching for number of rooms...")
    sleep(number=2)
    try:
        number_rooms_data = driver.find_element_by_xpath(
            "/html/body/div[1]/div/main/section/div/div[1]/div/div[3]/div/div[2]/div/div"
        )

        if number_rooms_data:
            send_log(
                x_request_id=x_request_id,
                message="Found information about number of rooms...",
            )

            number_rooms = number_rooms_data.text

            # Start verification if has digit
            # then going to return. If do not have then return 0.
            return (int(re.findall(r"\d+", number_rooms)[0])
                    if verification_string_has_digit(x_request_id=x_request_id,
                                                     text=number_rooms) else 0)
    except (AttributeError, NoSuchElementException) as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
示例#2
0
def scraper_flow(x_request_id: str, driver: any):
    """
        Function responsible for deal with flow logic of QuintoAndar scraper.

    Parameters:
        x_request_id: Unique id.
        driver: Google Chrome instance

    Returns:
        void
    """
    try:
        timeout_start = time.time()
        send_log(
            x_request_id=x_request_id,
            message=f"Initiating the flow of scraper. Time: {timeout_start}",
        )
        recursive_scraper_logic(
            x_request_id=x_request_id,
            div_number_row=quinto_andar["div_number_row_initiator"],
            div_number_column=quinto_andar["div_number_column_initiator"],
            limit_scraper=quinto_andar["limit_scraper"],
            timeout_start=timeout_start,
            driver=driver,
        )
        sleep(10)
        send_log(x_request_id=x_request_id,
                 message="Finished the flow of scraper.")
    except (WebDriverException, ElementNotInteractableException) as exception:
        error_handler(
            x_request_id=x_request_id,
            _msg="Exception occurred on scraper_flow",
            exception=exception,
        )
def get_type_residence(x_request_id: str, driver) -> str:
    """
        Function responsible for return type of residence.

        Parameters:
                x_request_id: unique id
                driver: google chrome instance
    Returns:
        str
    """
    send_log(
        x_request_id=x_request_id,
        message="Searching for the type of residence...",
    )
    sleep(number=2)
    try:
        type_residence_data = driver.find_element_by_xpath(
            "/html/body/div[1]/div/main/section/div/div[1]/div/div[2]/div[1]/h1"
        )
        if type_residence_data:
            send_log(
                x_request_id=x_request_id,
                message="Found the type of residence...",
            )

            type_residence = type_residence_data.text
            type_residence = type_residence.lower()
            if "casa" in type_residence:
                return "house"
            return "apartment"
    except (AttributeError, NoSuchElementException) as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
示例#4
0
def executor(x_request_id: str, consumer: str, properties: any,
             driver: any) -> any:
    """
    Function responsible for execute a specific consumer
        that is declared in consumers object

    Parameters:
        x_request_id: str
        consumer: str
        properties: any
        driver: any

    Returns:
        None
    """
    if consumer in consumers_object:
        send_log(
            x_request_id=x_request_id,
            message=
            f"Going to execute consumer {consumer}... With follow properties {properties}",
        )
        return consumers_object.get(consumer)(x_request_id=x_request_id,
                                              properties=properties,
                                              driver=driver)

    send_log(
        x_request_id=x_request_id,
        message="Consumer object not found going to execute default...",
    )
    return consumers_object.get("default")(x_request_id=x_request_id,
                                           properties=properties,
                                           driver=driver)
def get_link_of_resident_block(x_request_id, div_number_row: int,
                               div_number_column: int, driver) -> classmethod:
    """
    Function responsible for get link of one of blocks in QuintoAndar
        homepage.

        Parameters:
            x_request_id: UniqueId
            div_number_row: Number of the block in row in the page
            div_number_column: Number of the block in column in page
            driver: Google Chrome instance

        uses: wemake-services/[email protected]
        continue-on-error: true
        with:
        Returns
            Link <str>
    """
    send_log(
        x_request_id=x_request_id,
        message=f"Getting link of a respective residence base on row "
        f"{div_number_row} and column {div_number_column}...",
    )
    try:
        link = driver.find_element_by_xpath(
            "/html/body/div[1]/main"
            "/section[2]/div[2]/div"
            f"/div[1]/div[{div_number_row}]/div[{div_number_column}]/div/a")
        return link if link else None
    except (AttributeError, NoSuchElementException) as exception:
        error_handler(
            x_request_id=x_request_id,
            _msg="Exception occurred get_link_of_resident_block",
            exception=exception,
        )
def get_furniture_flag(x_request_id: str, driver) -> bool:
    """
    Function responsible for get flag that represent if the resident
        already have furniture.

    Parameters:
            x_request_id: unique id
            driver: google chrome instance
    Returns:
        int
    """
    send_log(x_request_id=x_request_id,
             message="Searching for a furniture flag...")
    sleep(number=2)
    try:
        flag_furniture_data = driver.find_element_by_xpath(
            "/html/body/div[1]/div/main/section/div/div[1]/div/div[3]/div/div[7]/div/div/span"
        )
        if flag_furniture_data:
            send_log(
                x_request_id=x_request_id,
                message="Found information about furniture in the residence...",
            )
            flag_furniture = flag_furniture_data.text
            flag_furniture = flag_furniture.lower()
            return bool("sem" not in flag_furniture)
    except (AttributeError, NoSuchElementException) as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
示例#7
0
def create_residence_features(
    x_request_id: str,
    residence_id: int,
    residence_feature_key: str,
    residence_feature_value: any,
) -> None:
    """
    Function responsible for create residence feature data.

    Parameters:
        x_request_id: str
        residence_id: int
        residence_feature_key: str
        residence_feature_value: any

    Returns:
        None
    """
    table_name = TableNameSchema()

    data = {
        "ResidenceId": residence_id,
        "key": residence_feature_key,
        "value": str(residence_feature_value),
    }
    residence_feature = create(
        x_request_id=x_request_id,
        data=data,
        table_name=table_name.residence_features,
    )
    send_log(
        x_request_id=x_request_id,
        message=
        f"Inserted in database the follow residence values {residence_feature}...",
    )
def resident_localization_data(x_request_id: str, driver) -> list:
    """
        Function responsible for get all information
            about localization of specific residence.

        Parameters:
                x_request_id: unique id
                driver: google chrome instance
    Returns:
        Object { street_name<String>, district_name<String>, state_name<String> }
    """
    send_log(
        x_request_id=x_request_id,
        message="Searching for address of residence...",
    )
    sleep(number=7)
    try:
        localization_data = driver.find_element_by_xpath(
            "/html/body/div[1]/div/main/section/div/div[1]/div/div[2]/div[2]/p"
        )

        if localization_data:
            send_log(
                x_request_id=x_request_id,
                message="Found information about address...",
            )

            localization_data = localization_data.text
            return localization_data.split(",") if localization_data else None
    except (AttributeError, NoSuchElementException) as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
示例#9
0
def get_metro_flag(x_request_id: str, driver) -> bool:
    """
        Function responsible for identify if has metro close to the residence.

        Parameters:
                x_request_id: unique id
                driver: google chrome instance
    Returns:
        Bool <True or False>
    """
    send_log(x_request_id=x_request_id, message="Searching for subway flag...")
    sleep(number=2)
    try:
        metro_flag_data = driver.find_element_by_xpath(
            "/html/body/div[1]/div/main/section/div/div[1]/div/div[3]/div/div[8]/div/div/span"
        )
        if metro_flag_data:
            send_log(
                x_request_id=x_request_id,
                message="Found information about subway...",
            )

            metro_flag_text = metro_flag_data.text

            return bool(metro_flag_text.find("Não"))
    except (AttributeError, NoSuchElementException) as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
示例#10
0
def get_residence_id(x_request_id: str, driver: any) -> int:
    """
    Function responsible for return id of residence.

    Parameters:
            x_request_id: unique id
            driver: google chrome instance
    Returns:
        int
    """
    send_log(x_request_id=x_request_id,
             message="Searching for the residence id...")
    sleep(number=2)
    try:
        residence_id = driver.find_element_by_xpath(
            "/html/body/div[1]/div/main/section/div/div[1]/nav/ol/li[5]/a")

        if residence_id:
            send_log(
                x_request_id=x_request_id,
                message="Found id of residence...",
            )

            residence_id_text = residence_id.text

            # Start verification if has digit
            # then going to return. If do not have then return 0.
            return (int(re.findall(r"\d+", residence_id_text)[0])
                    if verification_string_has_digit(x_request_id=x_request_id,
                                                     text=residence_id_text)
                    else 0)
    except (AttributeError, NoSuchElementException) as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
示例#11
0
def residence_size(x_request_id: str, driver) -> int:
    """
        Function responsible for return the size of the residence.

        Parameters:
                x_request_id: unique id
                driver: google chrome instance
    Returns:
        int: size of residence
    """
    send_log(
        x_request_id=x_request_id,
        message="Searching for the number of bedrooms...",
    )
    sleep(number=2)
    try:
        size_residence_data = driver.find_element_by_xpath(
            "/html/body/div[1]/div/main/section/div/div[1]/div/div[3]/div/div[1]/div/div/span"
        )
        if size_residence_data:
            send_log(
                x_request_id=x_request_id,
                message="Found information about bedrooms...",
            )

            size_residence = size_residence_data.text
            # Start verification if has digit
            # then going to return. If do not have then return 0.
            return (int(re.findall(r"\d+", size_residence)[0]) if
                    verification_string_has_digit(x_request_id=x_request_id,
                                                  text=size_residence) else 0)
    except (AttributeError, NoSuchElementException) as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
def creation_residence_data(
    x_request_id: str, residence_data: QuintoAndarSchema
) -> None:
    """
    Function responsible for
        create all data from residence.

    Parameters:
        x_request_id: str
        residence_data: QuintoAndarSchema

    Returns:
        None

    Notes:
        Function deal_with_feature is responsible for
            receive each different features and create
            respectively to each residence.
    """
    try:
        send_log(
            message=f"Going to create the follow data {residence_data}",
            x_request_id=x_request_id,
        )

        residence_address_id = create_residence_address(
            x_request_id=x_request_id, residence_data=residence_data
        )
        residence_id = create_residence(
            x_request_id=x_request_id,
            residence_address_id=residence_address_id,
            residence_data=residence_data,
        )
        create_residence_values(
            x_request_id=x_request_id,
            residence_id=residence_id,
            residence_data=residence_data,
        )
        features = {
            "petFlag": residence_data.pet_flag,
            "metroFlag": residence_data.metro_flag,
            "furnitureFlag": residence_data.furniture_flag,
        }

        deal_with_feature(
            features=features,
            x_request_id=x_request_id,
            residence_id=residence_id,
        )
    except (SyntaxError, AttributeError, AssertionError) as exception:
        return error_handler(
            exception=exception,
            _msg="Exception occurred in create_residence_flow",
        )
示例#13
0
def save_window_opener(x_request_id: str, driver: any) -> any:
    """
    Function responsible for save main window.

    Parameters:
        x_request_id: str
        driver: Google chrome instance

    Returns:
        void
    """
    send_log(x_request_id=x_request_id, message="Saving main screen...")
    return driver.current_window_handle
示例#14
0
def event_switch_right_window(x_request_id: str, driver) -> None:
    """
    Function responsible for switch to the right window.

    Parameters:
        x_request_id: str
        driver: Google Chrome instance

    Returns:
        void
    """
    send_log(x_request_id=x_request_id, message="Changing to the next tab...")
    driver.find_element_by_tag_name("body").send_keys(Keys.CONTROL + Keys.TAB)
示例#15
0
def finish_session(x_request_id: str, driver) -> None:
    """
    Function responsible for finish a respective session.

    Parameters:
            x_request_id: Unique id.
            driver: Google Chrome instance.

    Returns:
            void
    """
    send_log(x_request_id=x_request_id,
             message="Finishing session of scraper...")
    driver.quit()
示例#16
0
def get_rent_values(x_request_id: str, driver) -> dict:
    """
        Function responsible for get all values about the rent of the residence.

        Parameters:
                x_request_id: unique id
                driver: google chrome instance
    Returns:
        dict
    """
    send_log(
        x_request_id=x_request_id,
        message="Searching for a values of the rent...",
    )
    try:
        sleep(number=2)
        rent_values_data = driver.find_element_by_xpath(
            "/html/body/div[1]/div/main/section/div/div[2]/section/div/ul")

        if rent_values_data:
            send_log(x_request_id=x_request_id, message="Found the values...")

            rent_values_dict = {
                "rent_without_taxes": int,
                "condominium_tax": int,
                "house_tax": int,
                "fire_insurance": int,
                "service_tax": int,
                "total_rent_value": int,
            }

            rent_values = rent_values_data.text
            if rent_values:
                rent_values = rent_values.replace("Incluso", "0")
                rent_values = re.findall(r"(?<![.,])\d+[,.]{0,1}\d*",
                                         rent_values)

                # Going to get values in case of find 6 numbers in array.
                if len(rent_values) == 6:
                    rent_values_dict["rent_without_taxes"] = rent_values[0]
                    rent_values_dict["condominium_tax"] = rent_values[1]
                    rent_values_dict["house_tax"] = rent_values[2]
                    rent_values_dict["fire_insurance"] = rent_values[3]
                    rent_values_dict["service_tax"] = rent_values[4]
                    rent_values_dict["total_rent_value"] = rent_values[5]

                return rent_values_dict
    except (AttributeError, NoSuchElementException) as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
def validate_message_data(x_request_id: str, message: any) -> dict:
    """
    Function responsible for validate message data.

    Parameters:
        x_request_id: id unique
        message: object

    Returns:
        object
    """
    send_log(x_request_id=x_request_id, message="Validating message...")
    message = json.loads(message)

    return message if message.get("events") else None
示例#18
0
def verification_string_has_digit(x_request_id: str, text: str) -> bool:
    """
    Function responsible for verify if text has an digit or not.

    Parameters:
            x_request_id: unique id
            text: sentence that going to be verified

    Returns:
        bool: True or False
    """
    send_log(x_request_id=x_request_id,
             message="Verification if text has digit...")
    if text is None:
        return False
    return any(map(str.isdigit, text))
示例#19
0
def create_residence_values(
    x_request_id: str, residence_id: int, residence_data: QuintoAndarSchema
) -> None:
    """
    Function responsible for create residence
        values.

    Parameters:
        x_request_id: str
        residence_id: int
        residence_data: QuintoAndarSchema

    Returns:
        int
    """
    try:
        table_name = TableNameSchema()
        data = {
            "ResidenceId": residence_id,
            "price": float(residence_data.rent_price_without_tax),
            "condominiumTax": float(residence_data.condominium_tax),
            "houseTax": float(residence_data.house_tax),
            "fireInsurence": float(residence_data.fire_insurance),
            "serviceTax": float(residence_data.service_tax),
            "totalRentPrice": float(residence_data.total_rent_price),
        }

        residence_values = create(
            x_request_id=x_request_id,
            data=data,
            table_name=table_name.residence_values,
        )
        send_log(
            x_request_id=x_request_id,
            message=f"Inserted in database the follow residence values {residence_values}...",
        )
    except (
        TimeoutError,
        SyntaxError,
        IndexError,
        AttributeError,
    ) as exception:
        return error_handler(
            x_request_id=x_request_id,
            exception=exception,
            _msg="Exception occurred in create_residence_value",
        )
示例#20
0
def open_new_tab(x_request_id: str, link) -> None:
    """
    Function responsible for open new tab
        base on link

    Parameters:
        x_request_id: str
        link

    Returns:
        void
    """
    send_log(
        x_request_id=x_request_id,
        message="Opening new tab for the link the was get...",
    )
    link.send_keys(Keys.CONTROL + Keys.RETURN)
示例#21
0
def send_message(
    x_request_id: str,
    queue,
    message_body,
    message_attributes=None,
    thread_number: int = 0,
) -> None:
    """
    Send a message to an Amazon SQS queue.

    Parameters:
        x_request_id: unique id
        queue: The queue to receive the messages.
        message_body: The messages to send to the queue.
            These are simplified to contain only the message body and attributes.
        message_attributes: any
        thread_number: int
            represent the number of thread of queue.
                these is important to make QUEUE work in thread

    Returns:
    The response from SQS that contains the assigned message ID.
    """
    if not message_attributes:
        message_attributes = {}
    try:
        queue.send_message(
            MessageBody=message_body,
            MessageAttributes=message_attributes,
            MessageDeduplicationId=f"wmh_scraper_{random_number(10000)}",
            MessageGroupId=f"wmh_scraper_{thread_number}",
        )

        message_body = json.loads(message_body)

        send_log(
            message=f"Sending the follow msg to SQS QUEUE {message_body}",
            x_request_id=x_request_id,
        )
    except (ClientError, TypeError) as exception:
        error_handler(
            x_request_id=x_request_id,
            _msg=f"Send message failed: {message_body}",
            exception=exception,
        )
def open_page(x_request_id: str, driver, link: str) -> None:
    """Function responsible for open a web page, base on specific Link.

    Parameters:
            x_request_id: Unique id.
            driver: Google Chrome instance.
            link: URL of WebPage.

    Returns:
            void
    """
    if link:
        driver.get(link)

    send_log(
        x_request_id=x_request_id,
        message=f"Page was open with follow link {link}...",
    )
示例#23
0
def delete_message(x_request_id: str, message) -> None:
    """
    Delete an message from a queue.

    Parameters:
        x_request_id: Unique id str
        message: The message to delete. The message's queue URL is
            contained in the message's metadata.

    Returns:
        None
    """
    try:
        message.delete()
        send_log(
            x_request_id=x_request_id,
            message="Message have been deleted with success.",
        )
    except (ClientError, AttributeError) as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
def pet_flag(x_request_id: str, driver) -> bool:
    """
        Function responsible for flag if the residence can have pet or not.

        Parameters:
                x_request_id: unique id
                driver: google chrome instance
    Returns:
        Bool <True or False>
    """
    send_log(x_request_id=x_request_id, message="Searching for pet flag...")
    sleep(number=2)
    try:
        pet_flag_data = driver.find_element_by_xpath(
            "/html/body/div[1]/div/main/section/div/div[1]/div/div[3]/div/div[6]/div/div/span"
        )
        if pet_flag_data:
            send_log(
                x_request_id=x_request_id,
                message="Found information about pet flag...",
            )

            pet_flag_text = pet_flag_data.text

            send_log(
                x_request_id=x_request_id,
                message=f"Pet flag informatio is {pet_flag_text}",
            )

            return not bool("Não" in pet_flag_text or "Nao" in pet_flag_text)
    except (AttributeError, NoSuchElementException) as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
示例#25
0
def api_integration(x_request_id: str, url: str, token: str,
                    body: dict) -> json:
    """
    Function responsible for send request
        based on path and data body.

    Parameters:
        x_request_id: str
        url: str
        token: str
        body: dict

    """
    headers = {"Content-Type": "application/json", "Authorization": token}
    try:
        send_log(
            x_request_id=x_request_id,
            message=
            f"Sending request to follow path: {url} with follow data: {body}",
        )
        data = requests.post(url=url,
                             data=json.dumps(body),
                             headers=headers,
                             timeout=25)
        send_log(
            x_request_id=x_request_id,
            message=f"Request finish with status: {data.status_code}",
        )
        return data
    except (
            requests.exceptions.Timeout,
            requests.exceptions.ReadTimeout,
    ) as exception:
        return error_handler(
            x_request_id=x_request_id,
            _msg="Exception occurred in api service.",
            exception=exception,
        )
示例#26
0
def main(driver: any, queue: any) -> None:
    """
    Consumer responsible for receive messages from SQS Queue

    Parameters:
        driver: any
        queue: any

    Returns:
        None
    """
    try:
        while True:
            messages = receive_messages(queue=queue, max_number=1, wait_time=0)
            if len(messages) == 0:
                send_log(
                    x_request_id="",
                    message="QUEUE with 0 messages, going to send default event in 30 minutes...",
                )
                sleep(number=1800)
                dealing_with_empty_queue(queue=queue)
            else:
                for message in messages:
                    x_request_id = request_handler(message=message.body)
                    send_log(
                        x_request_id=x_request_id,
                        message="Receive message going to start scraper flow...",
                    )
                    consumer_message_handler(
                        message=message.body,
                        x_request_id=x_request_id,
                        driver=driver,
                    )
                    delete_message(x_request_id=x_request_id, message=message)
    except AttributeError as exception:
        error_handler(exception=exception)
def recursive_scraper_logic(
    x_request_id: str,
    div_number_row: int,
    div_number_column: int,
    limit_scraper: int,
    timeout_start,
    driver,
):
    """
        Function responsible for deal with recursive scraper logic.

    Parameters:
        x_request_id: Unique id.
        div_number_row: Number of the block in row in the page
        div_number_column: Number of the block in column in page
        limit_scraper: Number responsible for define the limit of scraper to the page
        timeout_start: The time that scraper begin
        driver: Google Chrome instance

    Notes:
        The function define the number of scraper that happens
            in the page until event of scroll happens. And the logic
            start again.

    Returns:
        void
    """
    timeout = 900

    sleep(15)

    # Scraper will happen for 15 minutes #
    if time.time() < timeout_start + timeout:
        link = get_link_of_resident_block(
            x_request_id=x_request_id,
            div_number_row=div_number_row,
            div_number_column=div_number_column,
            driver=driver,
        )

        quinto_andar_data = QuintoAndarSchema()

        if link:
            main_window = save_window_opener(x_request_id=x_request_id,
                                             driver=driver)
            open_new_tab(x_request_id=x_request_id, link=link)
            event_switch_right_window(x_request_id=x_request_id, driver=driver)
            event_switch_to_tab_window(main_window=main_window, driver=driver)
            sleep(8)
            send_log(
                x_request_id=x_request_id,
                message="Initiation of collection of data...",
            )
            resident_data = get_resident_block_data(
                x_request_id=x_request_id,
                quinto_andar_data=quinto_andar_data,
                driver=driver,
            )
            creation_residence_data(x_request_id=x_request_id,
                                    residence_data=resident_data)
            close_current_tab(driver=driver, main_window=main_window)
            send_log(x_request_id=x_request_id,
                     message="Return to main screen...")
            sleep(1)
            driver.switch_to_window(main_window)

        div_number_row, div_number_column = recursive_column_row_logic(
            x_request_id=x_request_id,
            div_number_column=div_number_column,
            div_number_row=div_number_row,
            limit_scraper=limit_scraper,
            driver=driver,
        )

        send_log(
            x_request_id=x_request_id,
            message=f"Data of residence is: {quinto_andar_data}",
        )
        limit_scraper += 1

        recursive_scraper_logic(
            x_request_id=x_request_id,
            div_number_row=div_number_row,
            div_number_column=div_number_column,
            limit_scraper=limit_scraper,
            timeout_start=timeout_start,
            driver=driver,
        )