Пример #1
0
def send_emails(emails_list):
    """Send emails based on a list of tuples in the form (address, subject, body, *attachments).

    Args:
        emails_list (list): A list of tuples, with each tuple containing the following data:
            - address (str): the destination email address.
            - subject (str): the subject line of the email.
            - body (str): the main body content of the email.
            - *attachments (str, optional): 0 or more attachment file locations.

    """
    if not config.EMAILS_ENABLED:
        return None

    # Provide sender email password as second argument if not saving via 'set_email_login()'
    with yagmail.SMTP(config.SENDER_EMAIL) as server:
        print("\nSending {} email(s)...".format(len(emails_list)))
        valid_login = True
        for address, subject, body, *attachments in emails_list:
            # Re-attempt send if login failed and password updated via 'set_email_login()'
            while valid_login:
                try:
                    server.send(to=address, subject=subject, contents=body, attachments=attachments)
                    print("Email sent to {} (subject: {}).".format(address, subject))
                    time.sleep(0.5)
                    # Break out of 'while valid_login' loop to move to next email
                    break

                except smtplib.SMTPAuthenticationError as e:
                    short_err_msg = "Email login failed. Please ensure that less secure app " \
                                    "access is 'On' for the sender email Google account and " \
                                    "that your saved login details are correct."

                    print(short_err_msg)
                    # Provide option of updating password and re-attempting send
                    answer = input("Type 'y' and press Enter to update your saved password "
                                   "and re-attempt the login: "******"These can be updated using "
                                                   "set_email_login().", trace=False)

                        logs.log_error(err_msg)
                        # Skip send attempts for any subsequent emails
                        valid_login = False

                # Prevent all emails failing; log error and save unsent email
                except Exception as e:
                    err_msg = logs.get_err_str(e, "Error sending email to {}.".format(address))
                    logs.log_error(err_msg)
                    save_unsent_email(address, subject, body)
                    # Break out of while loop to move to next email
                    break
Пример #2
0
    def download_robotstxt(self, max_attempts=5, wait=120):
        """Extract and return the current content (str) of the robots.txt file.

        Args:
            max_attempts (int): the maximum number of robots.txt URL connection attempts.
            wait (int): the number of seconds between connection attempts.

        """
        robots_url = self.url + "robots.txt"

        for attempt in range(max_attempts):
            attempts_str = " Trying again in {} seconds. " \
                           "Attempt {} of {}.".format(wait, attempt + 1, max_attempts)

            try:
                headers = {'User-Agent': config.USER_AGENT}
                req = requests.get(robots_url,
                                   headers=headers,
                                   allow_redirects=False,
                                   timeout=40)

            except requests.exceptions.Timeout as e:
                err = "{} timed out before sending a valid response.".format(
                    robots_url)
                if attempt < (max_attempts - 1):
                    print(err + attempts_str)
                    time.sleep(wait)
                else:
                    # Final connection attempt failed
                    self.err_message = logs.get_err_str(e, err, trace=False)
                    raise

            except requests.exceptions.ConnectionError as e:
                err = "There was a connection error when accessing {}.".format(
                    robots_url)
                if attempt < (max_attempts - 1):
                    print(err + attempts_str)
                    time.sleep(wait)
                else:
                    # Final connection attempt failed
                    self.err_message = logs.get_err_str(e, err)
                    logs.admin_email_errors.append(self.err_message)
                    raise

            else:
                # If no exceptions raised
                if req.status_code != 200:
                    self.err_message = "{} returned a {} status code." \
                                       "".format(robots_url, req.status_code)
                    raise requests.exceptions.HTTPError

                # URL was successfully reached and returned a 200 status code
                return req.text
Пример #3
0
def main():
    """Run all checks and handle fatal errors."""
    try:
        sites_data = sites_from_file(config.MONITORED_SITES)
        RunChecks(sites_data).check_all()

    except Exception as fatal_err:
        # Fatal error during CSV read or RunChecks
        fatal_err_msg = logs.get_err_str(fatal_err, "Fatal error.")
        logs.log_error(fatal_err_msg)

        email_subject = "Robots.txt Check Fatal Error"
        email_content = "There was a fatal error during the latest robots.txt checks which " \
                        "caused the program to terminate unexpectedly."

        email_body = emails.get_admin_email_body(email_content)
        emails.admin_email.append(
            (config.ADMIN_EMAIL, email_subject, email_body))

    finally:
        if config.EMAILS_ENABLED:
            emails.send_emails(emails.admin_email)
        else:
            print(
                "Note: emails are disabled. Details of the program run have been printed "
                "and/or logged. Set 'EMAILS_ENABLED' to equal 'True' to send/receive emails."
            )

        logs.update_main_log("\n{}END OF RUN{}\n".format("-" * 20, "-" * 20),
                             timestamp=False)
Пример #4
0
    def run_check(self):
        """Update the robots.txt file records and check for changes.

        Returns:
            The class instance representing the completed robots.txt check.
        """
        if self.err_message:
            # If error/invalid URL during __init__
            return self

        try:
            extraction = self.download_robotstxt()
            self.update_records(extraction)
            if not self.first_run:
                self.check_diff()

        except Exception as e:
            # Anticipated errors caught in 'download_robotstxt()' and logged in 'self.err_message'
            if not self.err_message:
                self.err_message = logs.get_err_str(
                    e, "Unexpected error during {} check."
                    "".format(self.url))

                logs.admin_email_errors.append(self.err_message)

        return self
Пример #5
0
    def __init__(self, url):
        self.url = url
        # Updated where appropriate as the check progresses
        self.first_run = False
        self.err_message = None
        self.file_change = False
        # Use site domain name as directory name
        if self.url[:5] == 'https':
            self.dir = config.PATH + "data/" + self.url[8:-1]
        else:
            self.dir = config.PATH + "data/" + self.url[7:-1]
        self.old_file = self.dir + "/program_files/old_file.txt"
        self.new_file = self.dir + "/program_files/new_file.txt"
        # Content assigned during 'update_records()' after a successful check
        self.old_content = None
        self.new_content = None

        if (self.url[:4] != "http") or (self.url[-1] != "/"):
            self.err_message = "{} is not a valid site URL. The site URL must be absolute and " \
                               "end in a slash, e.g. 'https://www.example.com/'.".format(url)

        # If URL is valid and site directory doesn't exist, create required directories
        elif not os.path.isdir(self.dir):
            try:
                os.mkdir(self.dir)
                os.mkdir(self.dir + "/program_files")

            except Exception as e:
                self.err_message = logs.get_err_str(
                    e, "Error creating {} directories."
                    "".format(self.url))
                logs.admin_email_errors.append(self.err_message)
Пример #6
0
def sites_from_file(file):
    """Extract monitored sites data from a CSV and return as a list of lists.

    Args:
        file (str): file location of a CSV file with the following attributes:
            - Header row labelling the three columns, as listed below.
            - url (col1): the absolute URL of the website homepage, with a trailing slash.
            - name (col2): the website's name identifier (letters/numbers only).
            - email (col3): the email address of the site admin, who will receive alerts.

    """
    data = []
    with open(file, 'r') as sites_file:
        csv_reader = csv.reader(sites_file, delimiter=',')
        row_num = 0
        for row in csv_reader:
            # Skip the header row labels
            if row_num > 0:
                try:
                    data.append([row[0], row[1], row[2]])
                except Exception as e:
                    err_msg = logs.get_err_str(
                        e, "Couldn't extract row {} from CSV."
                        "".format(row_num))
                    logs.log_error(err_msg)

            row_num += 1

    return data
Пример #7
0
    def check_site(self, site_attributes):
        """Run a robots.txt check and report for a single site.

        Attributes:
            site_attributes (list): a list representing a single site's attributes
            in the form [url, name, email]. Each attribute is detailed below.
                - url (str): the absolute URL of the website homepage, with a trailing slash.
                - name (str): the website's name identifier (letters/numbers only).
                - email (str): the email address of the site admin, who will receive alerts.

        """
        try:
            url, name, email = site_attributes
            url = url.strip().lower()
            email = email.strip()

            check = RobotsCheck(url)
            check.run_check()

            if check.err_message:
                report = ErrorReport(check, name, email)
                self.error += 1
            elif check.first_run:
                report = FirstRunReport(check, name, email)
                self.first_run += 1
            elif check.file_change:
                report = ChangeReport(check, name, email)
                self.change += 1
            else:
                report = NoChangeReport(check, name, email)
                self.no_change += 1

            report.create_reports()

        # Prevent all site checks failing; log error to investigate
        except Exception as e:
            err_msg = logs.get_err_str(
                e, "Unexpected error for site: {}.".format(site_attributes))
            logs.log_error(err_msg)

            email_subject = "Robots.txt Check Error"
            email_content = "There was an unexpected error while checking or reporting on the " \
                            "robots.txt file of a site which is associated with your email. " \
                            "If this is the first check, please ensure all site details " \
                            "were provided in the correct format. The error details are " \
                            "shown below.\n\n{}".format(err_msg)

            email_content = emails.replace_angle_brackets(email_content)
            email_body = emails.get_site_email_body(email_content)
            emails.site_emails.append(
                (site_attributes[2].strip(), email_subject, email_body))
            self.error += 1