def setUpClass(cls):
     cls._setup_log()
     if "DROPBOX_API_KEY" not in os.environ:
         logger.error('DROPBOX_API_KEY env variable is not set!')
         raise Exception('DROPBOX_API_KEY env variable is not set!')
     logger.info('Loading Configuration..')
     cls.configuration = Configuration(config_src=os.path.join(cls.test_data_path, 'template_conf_all_args.yml'))
     cls.remote_tests_folder = '/job_bot_tests'
     cloud_store = JobBotDropboxCloudstore(config=cls.configuration.get_cloudstores()[0])
     cloud_store.delete_file(cls.remote_tests_folder)
 def test_update_get_email_data(self):
     cloud_store = JobBotDropboxCloudstore(config=self.configuration.get_cloudstores()[0],
                                           remote_files_folder=self.remote_tests_folder)
     email_types = (('application_to_send', cloud_store.get_application_to_send_email_data,
                     cloud_store.update_application_to_send_email_data),
                    ('inform_should_call', cloud_store.get_inform_should_call_email_data,
                     cloud_store.update_inform_should_call_email_data),
                    ('inform_success', cloud_store.get_inform_success_email_data,
                     cloud_store.update_inform_success_email_data))
     for email_type, get_func, update_func in email_types:
         # Copy bcks to to actual files
         bck_subject_path = os.path.join(cloud_store.local_files_folder,
                                         'bck_subject.txt')
         bck_html_path = os.path.join(cloud_store.local_files_folder,
                                      'bck_body.html')
         current_subject_file = '{type}_subject.txt'.format(type=email_type)
         current_html_file = '{type}_body.html'.format(type=email_type)
         subject_path = os.path.join(cloud_store.local_files_folder,
                                     current_subject_file)
         html_path = os.path.join(cloud_store.local_files_folder,
                                  current_html_file)
         copyfile(bck_subject_path, subject_path)
         copyfile(bck_html_path, html_path)
         # Upload stop_words
         logger.info('Uploading %s email data..' % email_type)
         update_func()
         # Check if it was uploaded
         self.assertIn(current_subject_file, cloud_store.ls(self.remote_tests_folder).keys())
         self.assertIn(current_html_file, cloud_store.ls(self.remote_tests_folder).keys())
         # Rename the old files before downloading them
         logger.info('Renaming the old file before downloading it..')
         copied_subject_file = os.path.join(self.test_data_path,
                                            self.file_name + '_{type}_subject.txt'.format(type=email_type))
         copied_html_file = os.path.join(self.test_data_path,
                                            self.file_name + '_{type}_body.html'.format(type=email_type))
         os.rename(os.path.join(self.test_data_path, current_subject_file), copied_subject_file)
         os.rename(os.path.join(self.test_data_path, current_html_file), copied_html_file)
         # Download it
         logger.info('Downloading {type} email data..'.format(type=email_type))
         actual_subject, actual_html = get_func()
         logger.debug("Received: %s and %s" % (actual_subject, actual_html))
         # Compare contents of downloaded file with the original
         with open(copied_subject_file, 'rb') as f:
             self.assertEqual(f.read(), bytes(actual_subject, encoding='utf-8'))
         with open(copied_html_file, 'rb') as f:
             self.assertEqual(f.read(), bytes(actual_html, encoding='utf-8'))
         logger.info("Clearing file: %s" % copied_subject_file)
         os.remove(copied_subject_file)
         logger.info("Clearing file: %s" % copied_html_file)
         os.remove(copied_html_file)
 def test_upload_download_attachment(self):
     cloud_store = JobBotDropboxCloudstore(config=self.configuration.get_cloudstores()[0],
                                           remote_files_folder=self.remote_tests_folder)
     # Copy bck to actual file
     attachment_path = os.path.join(cloud_store.local_files_folder,
                                    cloud_store.attachments_names[0])
     bck_attachment_path = os.path.join(cloud_store.local_files_folder,
                                        'bck_' + cloud_store.attachments_names[0])
     copyfile(bck_attachment_path, attachment_path)
     # Upload attachments
     logger.info('Uploading attachment..')
     cloud_store.upload_attachments()
     # Check if it was uploaded
     self.assertIn(cloud_store.attachments_names[0], cloud_store.ls(self.remote_tests_folder).keys())
     # Rename the old file before downloading it
     logger.info('Renaming the old file before downloading it..')
     os.rename(attachment_path, os.path.join(self.test_data_path, self.file_name))
     # Download it
     logger.info('Downloading attachment..')
     cloud_store.download_attachments()
     # Compare contents of downloaded file with the original
     self.assertEqual(open(os.path.join(self.test_data_path, self.file_name), 'rb').read(),
                      open(attachment_path, 'rb').read())
     # Delete the attachment
     os.remove(attachment_path)
Exemplo n.º 4
0
def main():
    """
    :Example:
    python main.py [-m crawl_and_send]
                   -c confs/template_conf.yml
                   -l logs/output.log
    """

    # Initializing
    args, configuration = init_main()

    # Start in the specified mode
    if args.run_mode == 'list_emails':
        data_store = JobBotMySqlDatastore(
            config=configuration.get_datastores()[0])
        show_ads_checked(ads=data_store.get_applications_sent())
    elif args.run_mode == 'remove_email':
        data_store = JobBotMySqlDatastore(
            config=configuration.get_datastores()[0])
        data_store.remove_ad(email_id=args.email_id)
    elif args.run_mode == 'upload_files':
        upload_files_to_cloudstore(cloud_store=JobBotDropboxCloudstore(
            config=configuration.get_cloudstores()[0]))
    elif args.run_mode == 'create_table':
        data_store = JobBotMySqlDatastore(
            config=configuration.get_datastores()[0])
        data_store.create_applications_sent_table()
    elif args.run_mode == 'crawl_and_send':
        crawl_and_send_loop(lookup_url=configuration.lookup_url,
                            check_interval=configuration.check_interval,
                            crawl_interval=configuration.crawl_interval,
                            anchor_class_name=configuration.anchor_class_name,
                            data_store=JobBotMySqlDatastore(
                                config=configuration.get_datastores()[0]),
                            cloud_store=JobBotDropboxCloudstore(
                                config=configuration.get_cloudstores()[0]),
                            email_app=GmailEmailApp(
                                config=configuration.get_email_apps()[0],
                                test_mode=configuration.test_mode))
    else:
        logger.error('Incorrect run_mode specified!')
        raise argparse.ArgumentTypeError('Incorrect run_mode specified!')
    def test_init(self):
        req_only_conf = Configuration(
            config_src=os.path.join(self.test_data_path, 'template_conf_required_args_only.yml'))

        cloud_store = JobBotDropboxCloudstore(config=self.configuration.get_cloudstores()[0],
                                              remote_files_folder=self.remote_tests_folder)
        boolean_attributes = [True if len(cloud_store.attachments_names) > 0 else False,
                              cloud_store._update_stop_words,
                              cloud_store._update_application_to_send_email,
                              cloud_store._update_inform_success_email,
                              cloud_store._update_inform_should_call_email]
        self.assertTrue(True, all(boolean_attributes))
        req_only_cloud_store = JobBotDropboxCloudstore(config=req_only_conf.get_cloudstores()[0],
                                                       remote_files_folder=self.remote_tests_folder)
        req_only_boolean_attributes = [True if len(req_only_cloud_store.attachments_names) == 0 else False,
                                       not req_only_cloud_store._update_stop_words,
                                       not req_only_cloud_store._update_application_to_send_email,
                                       not req_only_cloud_store._update_inform_success_email,
                                       not req_only_cloud_store._update_inform_should_call_email]
        self.assertTrue(True, all(req_only_boolean_attributes))
 def test_update_get_stop_words_data(self):
     cloud_store = JobBotDropboxCloudstore(config=self.configuration.get_cloudstores()[0],
                                           remote_files_folder=self.remote_tests_folder)
     # Copy bck to to actual file
     bck_stop_words_path = os.path.join(cloud_store.local_files_folder,
                                        'bck_stop_words.txt')
     stop_words_path = os.path.join(cloud_store.local_files_folder,
                                    'stop_words.txt')
     copyfile(bck_stop_words_path, stop_words_path)
     # Upload stop_words
     logger.info('Uploading stop_words..')
     cloud_store.update_stop_words_data()
     # Check if it was uploaded
     self.assertIn('stop_words.txt', cloud_store.ls(self.remote_tests_folder).keys())
     # Rename the old file before downloading it
     logger.info('Renaming the old file before downloading it..')
     os.rename(os.path.join(self.test_data_path, 'stop_words.txt'),
               os.path.join(self.test_data_path, self.file_name))
     # Download it
     logger.info('Downloading stop_words..')
     stop_words_downloaded = cloud_store.get_stop_words_data()
     stop_words_downloaded = "['" + "', '".join(stop_words_downloaded) + "']"
     # Compare contents of downloaded file with the original
     self.assertEqual(open(os.path.join(self.test_data_path, self.file_name), 'rb').read(),
                      bytes(stop_words_downloaded, encoding='utf8'))
Exemplo n.º 7
0
def crawl_and_send_loop(lookup_url: str, check_interval: int,
                        crawl_interval: int, anchor_class_name: str,
                        data_store: JobBotMySqlDatastore,
                        cloud_store: JobBotDropboxCloudstore,
                        email_app: GmailEmailApp) -> None:
    """
    The main loop.
    Crawls the ad site for new ads and sends emails where applicable and informs the applicant.

    :params lookup_url:
    :params check_interval:
    :params data_store:
    :params cloud_store:
    :params gmail_app:
    """

    ad_site_crawler = XeGrAdSiteCrawler(
        stop_words=cloud_store.get_stop_words_data(),
        anchor_class_name=anchor_class_name)
    attachments_local_paths = [
        os.path.join(cloud_store.local_files_folder, attachment_name)
        for attachment_name in cloud_store.attachments_names
    ]
    # Get the email_data, the attachments and the stop_words list from the cloudstore
    cloud_store.download_attachments()
    application_to_send_subject, application_to_send_html = cloud_store.get_application_to_send_email_data(
    )
    inform_should_call_subject, inform_should_call_html = cloud_store.get_inform_should_call_email_data(
    )
    inform_success_subject, inform_success_html = cloud_store.get_inform_success_email_data(
    )

    links_checked = [
        row[0] for row in data_store.get_applications_sent(columns='link')
    ]
    logger.info("Waiting for new ads..")
    while True:
        new_ads = list(
            ad_site_crawler.get_new_ads(lookup_url=lookup_url,
                                        ads_checked=links_checked,
                                        crawl_interval=crawl_interval))

        if len(new_ads) > 0:
            links_checked = [
                row[0]
                for row in data_store.get_applications_sent(columns='link')
            ]
            emails_checked = [
                row[0]
                for row in data_store.get_applications_sent(columns='email')
            ]
            for link, email in new_ads:
                if link not in links_checked and (email not in emails_checked
                                                  or email is None):
                    if email is None:
                        # Email applicant to inform him that he should call manually
                        logger.info(
                            "Link ({}) has no email. Inform the applicant.".
                            format(link))
                        email_app.send_email(
                            subject=inform_should_call_subject,
                            html=inform_should_call_html.format(link=link),
                            to=[email_app.get_self_email()])
                    else:
                        # Send application after 1 minute (don't be too cocky)
                        time.sleep(60)
                        logger.info("Sending email to: {}. Ad Link: {}".format(
                            email, link))
                        email_app.send_email(
                            subject=application_to_send_subject,
                            html=application_to_send_html.format(link),
                            to=[email],
                            attachments=attachments_local_paths)

                        # Inform applicant that an application has been sent successfully
                        email_app.send_email(subject=inform_success_subject,
                                             html=inform_success_html.format(
                                                 email=email, link=link),
                                             to=[email_app.get_self_email()])

                    email_info = {
                        "link": link,
                        "email": email,
                        "sent_on": datetime.datetime.utcnow().isoformat()
                    }
                    data_store.save_sent_application(email_info)
                    logger.info("Waiting for new ads..")

        # Look for new ads every 2 minutes
        logger.debug("Sleeping for {check_interval} seconds..".format(
            check_interval=check_interval))
        time.sleep(check_interval)
Exemplo n.º 8
0
def upload_files_to_cloudstore(cloud_store: JobBotDropboxCloudstore):
    cloud_store.update_stop_words_data()
    cloud_store.update_application_to_send_email_data()
    cloud_store.update_inform_should_call_email_data()
    cloud_store.update_inform_success_email_data()
    cloud_store.upload_attachments()
 def tearDownClass(cls):
     cloud_store = JobBotDropboxCloudstore(config=cls.configuration.get_cloudstores()[0])
     cloud_store.delete_file(cls.remote_tests_folder)