Exemplo n.º 1
0
import os
import logging

from pyquery import PyQuery as pq
from urllib import parse
from lxml import etree

from utils import prepare_logs_dir

# prepare all necessary for logs
prepare_logs_dir()
logging.basicConfig(
    filename='logs/parser.log',
    level=logging.INFO,
    format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
    datefmt='%d-%m-%y %H:%M')


class ExoneParser:
    """
     Exone website parser
    """
    CURRENT_DIR = os.path.dirname(os.path.realpath(__file__))
    # Config path for xml export
    OUTPUT_DIR = "parsed_xml"
    OUTPUT_FILENAME = "vacancies.xml"
    DIR_TO_EXPORT = os.path.join(CURRENT_DIR, OUTPUT_DIR)
    # the maximum number of attempts to obtain a response,
    # in case the server responded with an error
    ATTEMPTS_COUNT = 3
Exemplo n.º 2
0
from unittest.mock import patch
from utils import prepare_logs_dir

CURRENT_DIR = os.path.dirname(os.path.realpath(__file__))
TEST_DATA_DIR_NAME = 'data'
TEST_DATA_DIR = os.path.join(CURRENT_DIR, TEST_DATA_DIR_NAME)

LOGS_DIR = 'logs'
LOGS_PATH = os.path.join(CURRENT_DIR, LOGS_DIR)

LIST_PAGE_FILENAME = 'test_list.html'
LIST_PAGE_FILEPATH = os.path.join(TEST_DATA_DIR, LIST_PAGE_FILENAME)
VACANCY_PAGE_FILENAME = 'test_vacancy.html'
VACANCY_PAGE_FILEPATH = os.path.join(TEST_DATA_DIR, VACANCY_PAGE_FILENAME)

prepare_logs_dir(LOGS_PATH)


class ParserTestCase(unittest.TestCase):
    """
    Parser tests
    """
    TEST_LIST = [{
        'vacancy_type': 'Vollzeit',
        'vacancy_location': 'Giengen-Sachsenhausen',
        'vacancy_title': 'Elektronik-Montierer/in',
        'vacancy_url': 'https://www.exone.de/jm/web/tool/jobmanager/'
        'apply.php?sttyp=1&arst=detail&id=85',
        'vacancy_id': '85'
    }]
import grequests as grq
import requests as rq
import urllib3

from datetime import datetime
from urllib import parse
from lxml import etree
from pyquery import PyQuery as pq
from fake_useragent import UserAgent

from utils import prepare_logs_dir

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

prepare_logs_dir()

logging.basicConfig(filename='logs/parser.log', level=logging.INFO,
                    format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
                    datefmt='%d-%m-%y %H:%M')


def progress(count, total, status=''):
    """
    Console progress bar
    """
    bar_len = 50
    filled_len = int(round(bar_len * count / float(total)))

    percents = round(100.0 * count / float(total), 1)
    bar = '=' * filled_len + '-' * (bar_len - filled_len)
from utils import prepare_logs_dir

CURRENT_DIR = os.path.dirname(os.path.realpath(__file__))
TEST_DATA_DIR_NAME = 'data'
TEST_DATA_DIR = os.path.join(CURRENT_DIR, TEST_DATA_DIR_NAME)

LOGS_DIR = 'logs'
LOGS_PATH = os.path.join(CURRENT_DIR, LOGS_DIR)

LIST_PAGE_FILENAME = 'vacancy_list.json'
LIST_PAGE_FILEPATH = os.path.join(TEST_DATA_DIR, LIST_PAGE_FILENAME)
VACANCY_PAGE_FILENAME = 'test_vacancy.html'
VACANCY_PAGE_FILEPATH = os.path.join(TEST_DATA_DIR, VACANCY_PAGE_FILENAME)

prepare_logs_dir(LOGS_PATH)


def get_test_data():
    """
    Load test data from file
    :return: json test data
    """
    if not os.path.exists(LIST_PAGE_FILEPATH):
        print("File with test data not found")
        return None
    with open(LIST_PAGE_FILEPATH) as f:
        return json.load(f)


class ParserTestCase(unittest.TestCase):