Exemplo n.º 1
0
    def test_auto_increment(self):
        with session_scope(self.dbms) as session:
            vacancy_list = [
                Vacancies(platform=self.platform.name,
                          date=datetime(2019, 1, 2),
                          url="http://",
                          title="title",
                          company="aha",
                          search_topic='Java',
                          location="Wien"),
                Vacancies(platform=self.platform.name,
                          date=datetime(2019, 1, 3),
                          url="http://",
                          title="title",
                          company="aha",
                          search_topic='Java',
                          location="Wien")
            ]

            for v in vacancy_list:
                session.add(v)
                session.flush()

        with session_scope(self.dbms) as session:
            vacancies_instances_list = session.query(Vacancies).all()

            id_list = [entry.id for entry in vacancies_instances_list]
            self.assertEqual(id_list, [1, 2])

            title_list = [entry.title for entry in vacancies_instances_list]
            self.assertEqual(title_list, ['title', 'title'])
Exemplo n.º 2
0
def load_data_scripture(dbms, platform_registry):
    platform_json_file = os.path.join(ConfigHandler.ROOT_DIR, "tests/test_data/platform.json")
    vacancies_json_file = os.path.join(ConfigHandler.ROOT_DIR, "tests/test_data/vacancies.json")

    with session_scope(dbms=dbms) as session:

        with open(platform_json_file, 'r') as json_file:
            platform_file = json.loads(json_file.read())

            for row in platform_file:

                session.add(
                    Platform(name=row['name'], base_address=row['base_address'])
                )

        platform_registry.register_new_platform(KarriereATHandler)
        platform_registry.register_new_platform(StepStoneHandler)

        session.commit()
        session.flush()

        with open(vacancies_json_file, 'r') as json_file:
            platform_file = json.loads(json_file.read())

            for row in platform_file:
                date = datetime.strptime(row['date'], '%Y-%m-%d')
                session.add(
                    Vacancies(platform=row['platform'], search_topic=row['search_type'], date=date,
                              url=row['url'], company=row['company'], title=row['title'], location="")
                )
Exemplo n.º 3
0
    def test_database_creation(self):
        if os.path.isfile(TEST_DB_PATH):
            os.remove(TEST_DB_PATH)
            self.assertEqual(os.path.isfile(TEST_DB_PATH), False)

        dbms = DBHandler(DBHandler.SQLITE, db_name=TEST_DB_NAME)
        dbms.create_database_and_tables()
        # Test if DB was created
        self.assertEqual(os.path.isfile(TEST_DB_PATH), True)

        # Test that all tables were created
        result = dbms.get_tables_in_database()
        self.assertEqual(result, [{"name": "platform"}, {"name": "vacancies"}])

        dbms.load_initial_data()
        with session_scope(dbms) as session:
            platform_data = session.query(Platform).all()
            self.assertEqual(len(platform_data), 1)
            platform_data_row = platform_data[0]
            self.assertEqual(
                [platform_data_row.name, platform_data_row.base_address],
                ['karriere.at', 'www.karriere.at/'])

        os.remove(TEST_DB_PATH)
        self.assertEqual(os.path.isfile(TEST_DB_PATH), False)
    def _save_vacancy_entries_to_database(self, vacancy_entries: list,
                                          search_topic: str) -> bool:
        with session_scope(self.dbms) as session:
            try:
                for entry in vacancy_entries:
                    # Check if the entry already exists in the database
                    exists_check = session.query(Vacancies.id).filter(
                        Vacancies.platform == entry['platform'],
                        Vacancies.company == entry['company'],
                        Vacancies.url == entry['url'],
                        Vacancies.title == entry['title'],
                        Vacancies.search_topic == search_topic,
                        Vacancies.date == entry['date'], Vacancies.location ==
                        entry['location']).scalar() is not None

                    # If it does not exist, add it
                    if not exists_check:
                        new_vacancy = Vacancies(platform=entry['platform'],
                                                company=entry['company'],
                                                url=entry['url'],
                                                title=entry['title'],
                                                search_topic=search_topic,
                                                date=entry['date'],
                                                location=entry['location'])
                        session.add(new_vacancy)

            except Exception as e:
                print(
                    f"{self.header}: ERROR: Could not insert new entries! Msg.: {str(e)}"
                )
                raise

        return True
Exemplo n.º 5
0
    def test_insert_row_in_articles(self):
        with session_scope(self.dbms) as session:
            new_vacancy = Vacancies(platform=self.platform.name,
                                    date=datetime(2019, 1, 1),
                                    url="http://",
                                    title="title",
                                    company="aha",
                                    search_topic='Java',
                                    location="Wien")

            session.add(new_vacancy)
            session.flush()

        with session_scope(self.dbms) as session:
            vacancies_instances_list = session.query(Vacancies).all()

            self.assertEqual(len(vacancies_instances_list), 1)

            vacancies_instance = vacancies_instances_list[0]
            self.assertEqual(
                [vacancies_instance.id, vacancies_instance.platform],
                [1, 'test.at'])
Exemplo n.º 6
0
    def test_auto_delete_postings_after_x_days(self):
        # Reset Value to 14 (might be None due to other tests)
        ConfigHandler.POSTING_RETENTION_IN_DAYS = 30

        today = datetime.now()
        very_old_post_date = today - timedelta(
            days=ConfigHandler.POSTING_RETENTION_IN_DAYS)

        with session_scope(self.dbms) as session:
            vacancy_list = [
                Vacancies(platform=self.platform.name,
                          date=today,
                          url="http://",
                          title="title",
                          company="aha",
                          search_topic='Java',
                          location="Wien"),
                Vacancies(platform=self.platform.name,
                          date=very_old_post_date,
                          url="http://",
                          title="title",
                          company="aha",
                          search_topic='Java',
                          location="Wien")
            ]

            for v in vacancy_list:
                session.add(v)

        self.dbms.cleanup_job_postings_in_database()

        with session_scope(self.dbms) as session:
            vacancies_instances_list = session.query(Vacancies).all()

            # Only one should be left
            self.assertEqual(len(vacancies_instances_list), 1)

            # Only the new posting should be left
            self.assertEqual(vacancies_instances_list[0].date, today.date())
Exemplo n.º 7
0
    def test_inserting_an_already_existing_vacancy(self):
        """ Test that a newly scraped entry is skipped when it already exists in the database """
        with session_scope(dbms=self.dbms) as session:
            session.add(Vacancies(**self.vacancy_entries[0]))

        with session_scope(dbms=self.dbms) as session:
            vacancy_entries_query_set = session.query(Vacancies).all()

            # Check that only 1 row exists
            query_set_len = len(vacancy_entries_query_set)
            self.assertEqual(query_set_len, 1)

        self.karriere_at_handler._save_vacancy_entries_to_database(vacancy_entries=self.vacancy_entries,
                                                                   search_topic=self.search_topic)

        with session_scope(dbms=self.dbms) as session:
            vacancy_entries_query_set = session.query(Vacancies).all()

            query_set_len = len(vacancy_entries_query_set)

            # Count() should stay 1
            self.assertEqual(query_set_len, 1)
    def test_create_platform_entries_in_database(self):
        """ Test if the function creates a database entry for each registered platform """
        self.platform_registry.create_platform_entries_in_database()

        with session_scope(self.dbms) as session:
            platform_query_set = session.query(Platform).all()

            query_set_len = len(platform_query_set)
            self.assertEqual(query_set_len, 2)

            name_list = [entry.name for entry in platform_query_set]

            self.assertEqual(
                name_list,
                list(self.platform_registry.registered_platforms.keys()))
    def tearDownClass(cls):
        try:
            # Try to close the browser after all tests have run
            cls.browser_handler.close_browser()

        except:
            pass

        # Reset Database, delete all platforms
        dbms = DBHandler(DBHandler.SQLITE, db_name=TEST_DB_NAME)

        with session_scope(dbms) as session:
            session.query(Platform).delete()

        super().tearDownClass()
Exemplo n.º 10
0
    def test_saving_a_vacancy_list_to_db(self):
        """ Test that the function _save_vacancy_entries_to_database correctly saves the list to db. """
        self.karriere_at_handler._save_vacancy_entries_to_database(vacancy_entries=self.vacancy_entries,
                                                                   search_topic=self.search_topic)

        with session_scope(dbms=self.dbms) as session:
            vacancy_entries_query_set = session.query(Vacancies).all()

            query_set_len = len(vacancy_entries_query_set)
            self.assertEqual(query_set_len, 1)

            columns = [m.key for m in Vacancies.__table__.columns]
            result_dict = {}

            # Create a dictionary from the result-query-set (except for the id-column)
            for column in columns:
                if column == 'id':
                    continue

                result_dict[column] = getattr(vacancy_entries_query_set[0], column)

            # Compare the result-dictionary with the originally send dictionary
            self.assertEqual(result_dict, self.vacancy_entries[0])
Exemplo n.º 11
0
 def tearDown(self):
     with session_scope(self.dbms) as session:
         session.query(Vacancies).delete()
Exemplo n.º 12
0
    def tearDownClass(cls):
        dbms = DBHandler(DBHandler.SQLITE, db_name=TEST_DB_NAME)

        with session_scope(dbms) as session:
            session.query(Platform).delete()
Exemplo n.º 13
0
    def test_disabled_retention_in_days(self):
        """Test if value of "disabled" disables auto-deletion"""
        # Reset Value to 14 (might be None due to other tests)
        ConfigHandler.POSTING_RETENTION_IN_DAYS = 30

        today = datetime.now()
        very_old_post_date = today - timedelta(
            days=ConfigHandler.POSTING_RETENTION_IN_DAYS)

        ConfigHandler.CONFIG_PATH = os.path.join(
            ConfigHandler.ROOT_DIR, 'tests', 'test_data', 'config_jsons',
            'config_disabled_retention_days.json')
        ConfigHandler.validate_config_file_base_variables()

        browser_handler = BrowserHandler()
        browser = browser_handler.get_browser()

        platform_registry = PlatformRegistry(browser=browser, dbms=self.dbms)
        platform_registry.register_new_platform(KarriereATHandler)
        platform_registry.create_platform_entries_in_database()

        ConfigHandler.validate_search_topics(
            platform_registry=platform_registry)

        browser_handler.close_browser()

        # Check if Posting-retention-in-days was correctly set to None
        self.assertEqual(ConfigHandler.POSTING_RETENTION_IN_DAYS, None)

        with session_scope(self.dbms) as session:
            vacancy_list = [
                Vacancies(platform=self.platform.name,
                          date=today,
                          url="http://",
                          title="title",
                          company="aha",
                          search_topic='Java',
                          location="Wien"),
                Vacancies(platform=self.platform.name,
                          date=very_old_post_date,
                          url="http://",
                          title="title",
                          company="aha",
                          search_topic='Java',
                          location="Wien")
            ]

            for v in vacancy_list:
                session.add(v)

        self.dbms.cleanup_job_postings_in_database()

        with session_scope(self.dbms) as session:
            vacancies_instances_list = session.query(Vacancies).all()

            # Auto-Deletion should be disabled - both entries should be present
            self.assertEqual(len(vacancies_instances_list), 2)

            # Auto-Deletion should be disabled - both entries should be present
            date_list = [row.date for row in vacancies_instances_list]
            self.assertEqual(
                date_list,
                [today.date(), very_old_post_date.date()])
Exemplo n.º 14
0
    def print_result_to_html(self, seach_topic_list: list, open_html_after_finish: bool = True):
        # Copy the template html-file and use it as new result-html-file
        shutil.copy(src=ConfigHandler.TEMPLATE_HTML_PATH, dst=ConfigHandler.RESULT_HTML_PATH)

        with open(ConfigHandler.RESULT_HTML_PATH, 'a') as html_file:

            result_counter = 0

            with session_scope(dbms=self.dbms) as session:
                # Get all platform names
                platform_names = []
                result_rows = session.query(Platform.name).all()

                for row in result_rows:
                    platform_names.extend(list(row))

                # Print all platform names in the header, hyper-linking to the respective sections
                if self.verbose:
                    print(f"{self.header}: Printing entries for platforms {str(platform_names)}")

                html_file.write(f'<div class="jump_to_section"><h6>Jump to section ... </h6><ul>\n')

                for search_topic in seach_topic_list:
                    # html_file.write(f'<h6>Search Type: {search_topic}</h6>\n<ul>')

                    for platform in platform_names:
                        html_file.write(f'<li><a href="#{search_topic}_{platform}" class="contents_a">'
                                        f'... Search-Topic {search_topic} - {platform}</a></li>')

                    html_file.write(f'</ul></div><div class="header_clear_area"></div>\n')

                # Print main body with all entries
                for search_topic in seach_topic_list:
                    html_file.write(f"<h5>Search-Topic '{search_topic}'</h5>\n")

                    for platform in platform_names:
                        platform_instance = self.platform_registry.get_platform_instance(platform)

                        if search_topic not in platform_instance.scrape_status:
                            # Each platform could have its own search topic. Only proceed if this search topic
                            # was applied to this platform_instance
                            continue

                        html_file.write(f'<h6 id="{search_topic}_{platform}">{platform} - Job postings</h6>')
                        html_file.write('<div class="posting-list-wrapper">')

                        if not platform_instance.scrape_status[search_topic]:
                            # If a negative scrape status -> print error-message and jump to next platform
                            html_file.write('<p class="error_message message">An error occurred when trying to '
                                            'scrape entries from the platform {platform}</p><br></div>')
                            continue

                        # Get all entries in database for this platform
                        result_rows = session.query(Vacancies)\
                            .filter(Vacancies.platform == platform, Vacancies.search_topic == search_topic).all()

                        if self.verbose:
                            print(result_rows)

                        if len(result_rows) == 0:
                            html_file.write('<p class="message">No job postings found</p>')

                        for row in result_rows:
                            result_counter += 1

                            job_item_string = f'<div class="job-item" >' \
                                f'<a class="job-posting" href="{row.url}" target="_blank" ' \
                                f'onclick="activateCheckBox(\'job_checkbox_{result_counter}\', ' \
                                f'\'job_checkbox_label_{result_counter}\', \'outlined\', \'filled\');' \
                                f'event.stopPropagation();">' \
                                f'<div class="job-title-column"><div class="arrow-icon"></div>' \
                                f'<div class="job-title">{row.title}</div>' \
                                f'</div><div class="job-column">' \
                                f'<div class="job-company">{row.company}' \
                                f'</div>' \
                                f'<div class="job-date">{row.date} - {row.location}</div>' \
                                f'</div>' \
                                f'</a>' \
                                f'<div class="job-checkbox">' \
                                f'<label for="job_checkbox_{result_counter}" ' \
                                f'id="job_checkbox_label_{result_counter}" class="checkbox_btn outlined">Checked' \
                                f'<input type="checkbox" style="opacity: 0;"' \
                                f'onclick="toggleCheckBox(\'job_checkbox_{result_counter}\', ' \
                                f'\'job_checkbox_label_{result_counter}\', \'outlined\', \'filled\');' \
                                f'  "' \
                                f'id="job_checkbox_{result_counter}" class="badgebox">' \
                                f'<span class="badge">&nbsp;&check;&nbsp;</span></label>' \
                                f'</div>' \
                                f'</div>'

                            html_file.write(job_item_string)

                        html_file.write('</div><br><br>')

            html_file.write('</body></html>\n')

        if open_html_after_finish:
            os.system(ConfigHandler.RESULT_HTML_FILE_NAME)