Exemplo n.º 1
0
def main():
    log = Logger(dir_log + "test.log")
    print(log._timestamp())
    log._log('일반적인 설명문장을 로그에 기록 합니다.')

    log.info('정보에 관한 로그문장을 로그에 기록 합니다.')
    log.error('에러발생시 에러메시지를 로그에 기록 합니다.')
    log.warning('경고 발생시 메시지를 로그에 기록 합니다.')
def process_event(event, context):
    logger = Logger(event)
    processor = Processor(event, context, logger)
    try:
        processor.validate()
        processor.write_to_kinesis()
        logger.debug('Successfully converted to json',
                     payload=processor.payload)
        return ok()
    except JsonSchemaException as json_exception:  # schema validation failed
        logger.error('Validation error: ' + str(json_exception))
        return nok(HTTP_BAD_REQUEST)
    except ClientError as client_error:  # kinesis error
        logger.error('Client error: ' + str(client_error))
        return nok(HTTP_TOO_MANY_REQUESTS)
Exemplo n.º 3
0
def phase_start_time(logger: Logger, log_prefix: str, index: int,
                     data: str) -> Optional[datetime]:
    '''Get the start time for a given phase and return a time stamp'''

    # dates look like:
    #   Sun Apr 25 16:59:09 2021
    #   Sat May  1 03:10:43 2021
    #   1   2  3 4 5  6  7  8 <- group
    expression = r'... (\w{3}) (\w{3})(\s{1,2})(\d+) (\d+):(\d+):(\d+) (\d+)'

    res = re.search(expression, data[:250])
    if not res:
        logger.error(f'{log_prefix} index {index} failed to extract date')
        return None

    have: int = len(res.groups())
    need: int = 8
    if have != need:
        logger.error(
            f'{log_prefix} index {index} failed to match date, need {need} groups, have {have} groups'
        )
        return None

    month = res.group(2)
    day = res.group(4)
    hour = res.group(5)
    minute = res.group(6)
    second = res.group(7)
    year = res.group(8)

    date_string: str = f'{year}-{month}-{day} {hour}:{minute}:{second}'
    date_format: str = '%Y-%b-%d %H:%M:%S'
    dt = datetime.strptime(date_string, date_format)

    logger.debug(f'{log_prefix} index {index} start time {dt}')

    return dt
Exemplo n.º 4
0
class CyBot:
    def __init__(self, chromedriver, wait=5, delay=3):
        self._logger = Logger('cybot.log')

        self._logger.info('크롬 드라이버 로딩 중..')
        driver = webdriver.Chrome(chromedriver)
        driver.implicitly_wait(wait)
        self._logger.info('크롬 드라이버 로딩 완료')

        self._chromedriver = chromedriver
        self._base_url = 'https://cy.cyworld.com'
        self._user_id = ''
        self._wait_time = wait
        self._delay = delay
        self._driver = driver
        self._wait = WebDriverWait(driver, wait)


    def init(self):
        self._logger.info('싸이월드 홈페이지 접속 중..')
        # 싸이월드 페이지 열기
        self._driver.get('https://cyworld.com')
        self._logger.success('싸이월드 홈페이지 접속 완료')
        return self


    def login(self, user_email, user_password):
        self._logger.info('로그인 시도 중..')

        prev_url = self._driver.current_url
        self._driver.find_element_by_name('email').send_keys(user_email)
        self._driver.find_element_by_name('passwd').send_keys(user_password, Keys.RETURN)

        try:
            self._wait.until(EC_or(
                EC.url_changes(prev_url),
                EC.invisibility_of_element( \
                    (By.CSS_SELECTOR, '.ui-dialog.ui-widget.ui-widget-content.ui-corner-all.ui-front.ui-draggable.ui-resizable'))
            ))
        except:
            self._logger.error('시간이 초과되었습니다')
            exit()

        url = self._driver.current_url
        if 'timeline' in url:
            self._logger.success('로그인 성공')
            return self
        else:
            self._logger.error('사용자 정보를 확인해주세요')
            exit()


    def home(self):
        self._logger.info('마이 홈으로 이동 중..')

        prev_url = self._driver.current_url

        # 유저 고유번호 추출
        profile = self._driver.find_element_by_css_selector('a.freak1')
        self._user_id = profile.get_attribute('href').split('/').pop()

        # 프로필 사진 영역 클릭
        self._driver.find_element_by_id('imggnbuser').click()

        try:
            self._wait.until(EC.url_changes(prev_url))
        except:
            self._logger.error('시간이 초과되었습니다')
            exit()

        if 'home' not in self._driver.current_url:
            self._logger.error('마이 홈으로 이동할 수 없습니다')
            exit()

        self._logger.success('이동 완료')
        return self


    def feeder(self, content_list, running):
        content_index = 0

        # 모든 타임라인 컨텐츠 영역 추출
        while self._driver.find_element_by_css_selector('p.btn_list_more'):
            contents = self._driver \
                .find_elements_by_css_selector(
                    'input[name="contentID[]"]'
                )[content_index:]

            for content in contents:
                cid = content.get_attribute('value')
                content_url = '{}/home/{}/post/{}/layer' \
                    .format(self._base_url, self._user_id, cid)
                self._logger.info('Feeder::', content_url)
                content_list.append(content_url)
                content_index += 1

            # 더 보기 버튼 대기
            try:
                next_button = self._wait.until(
                    EC.element_to_be_clickable(
                        (By.CSS_SELECTOR, 'p.btn_list_more'))
                )
                time.sleep(self._delay)
            except:
                pass

            # 더 보기 버튼을 클릭할 수 없는 경우 (마지막 페이지인 경우) 반복 종료
            if not (next_button.is_displayed() and next_button.is_enabled()):
                running.value = 0
                break
            
            # 다음버튼 클릭
            next_button.click()

        running.value = 0
        self._driver.close()
        self._logger.info('Feeder:: 종료')


    def run(self, parser=2, downloader=2):
        self._logger.info('이미지 다운로드 작업 시작')
        start = time.time()

        # 멀티 프로세싱 처리를 위한 매니저
        with Manager() as manager:
            # 프로세스 목록
            processes = []

            # 공유 메모리 변수
            content_list = manager.list()
            image_list = manager.list()
            count = manager.Value('i', 0)
            lock = manager.Lock()
            feeder_running = manager.Value('i', 1)
            parser_running = manager.Value('i', 1)

            parser_logger = Logger('cybot_parser.log')
            downloader_logger = Logger('cybot_downloader.log')
            main_cookies = self._driver.get_cookies()
            cookie = []

            for c in main_cookies:
                cookie.append({ 'name': c['name'], 'value': c['value'] })

            # 파서 프로세스 생성 및 시작
            for idx in range(parser):
                parser_instance = Parser(
                    self._chromedriver,
                    cookie,
                    parser_logger,
                    self._wait_time,
                    self._delay
                )
                parser_process = Process(
                    target=parser_instance.parse, \
                    args=(
                        content_list,
                        image_list,
                        feeder_running,
                        parser_running
                    )
                )
                parser_process.name = 'Parser::' + str(idx)
                parser_process.start()
                processes.append(parser_process)
                self._logger.info('Parser', str(idx), '프로세스 시작')

            # 다운로더 프로세스 생성 및 시작
            for idx in range(downloader):
                downloader_instance = Downloader(downloader_logger)
                downloader_process = Process(
                    target=downloader_instance.download, \
                    args=(image_list, count, lock, parser_running))
                downloader_process.name = 'Downloader::' + str(idx)
                downloader_process.start()
                processes.append(downloader_process)
                self._logger.info('Downloader', str(idx), '프로세스 시작')

            # 피더 프로세스 시작
            self._logger.info('Feeder 시작')
            self.feeder(content_list, feeder_running)

            # 파서, 다운로더 프로세스가 종료되지않은 경우 대기
            for p in processes:
                p.join()

            self._logger.info('작업 소요시간: {}초' \
                .format(round(time.time() - start, 2)))
            self._logger.info('전체 이미지 수: {}'.format(count.value))
Exemplo n.º 5
0
from src.conf import Config
from src.logger import Logger
from sqlalchemy.ext.declarative import declarative_base
from geopy.geocoders import Nominatim
from sqlalchemy import exc
import pymysql

JSON_FILE_NAME = "src/mining_constants.json"

logger = Logger().logger
config = Config(JSON_FILE_NAME)

# mapper & MetaData: maps the subclass to the table and holds all the information about the database
Base = declarative_base()
from database.database import Database

try:
    database = Database()
    geolocator = Nominatim(user_agent=f"{config.DB_NAME}", timeout=3)

except exc.NoSuchModuleError as err:
    print(
        err._message(),
        f"\tinput: sql extension= {config.SQL_EXTENSION}, python DBAPI= {config.PYTHON_DBAPI}"
    )
    exit()

except pymysql.err.OperationalError as err:
    logger.error(config.CONNECTION_ERROR.format(err.args[1]))
    exit()
Exemplo n.º 6
0
class CyBot:
    __VERSION__ = '1.0.2'

    def __init__(self, chromedriver, wait=5, delay=3, \
        headless=False, onlog=None, onerror=exit, done=exit):
        self._logger = Logger('cybot.log', callback=onlog)

        self._chromedriver = chromedriver
        self._base_url = 'https://cy.cyworld.com'
        self._user_id = ''
        self._wait_time = wait
        self._delay = delay
        self._headless = headless
        self._onlog = onlog
        self._onerror = onerror
        self._done = done
        self._options = None
        self._driver = None
        self._wait = None

    def init(self):
        self._logger.info('크롬 드라이버 로딩 중..')
        try:
            options = webdriver.ChromeOptions()
            if self._headless:
                options.add_argument('headless')
                options.add_argument('window-size=800x600')
                options.add_argument("disable-gpu")
                options.add_argument('log-level=3')

            options.add_argument('--ignore-certificate-errors')
            options.add_argument('--ignore-ssl-errors')
            driver = webdriver.Chrome(self._chromedriver, \
                chrome_options=options)
            driver.implicitly_wait(self._wait_time)
        except Exception as e:
            self._logger.error('크롬 드라이버 로딩 실패', detail=e)
            self._onerror()
            return

        self._options = options
        self._driver = driver
        self._wait = WebDriverWait(driver, self._wait_time)
        self._logger.info('크롬 드라이버 로딩 완료')

        # 싸이월드 페이지 열기
        self._logger.info('싸이월드 홈페이지 접속 중..')
        self._driver.get('https://cyworld.com')
        self._logger.success('싸이월드 홈페이지 접속 완료')
        return self

    def login(self, user_email, user_password):
        self._logger.info('로그인 시도 중..')

        prev_url = self._driver.current_url
        try:
            self._driver.find_element_by_name('email').send_keys(user_email)
            self._driver.execute_script(
                'arguments[0].value = "{}"'.format(user_password),
                self._driver.find_element_by_name('passwd'))
            self._driver.find_element_by_name('passwd').send_keys(Keys.RETURN)
        except Exception as e:
            self._logger.error('알 수 없는 오류가 발생했습니다', detail=e)
            self._onerror()
            return None

        time.sleep(3)

        try:
            selectors = [
                '.ui-dialog', '.ui-widget', '.ui-widget-content',
                '.ui-corner-all', '.ui-front', '.ui-draggable', '.ui-resizable'
            ]
            self._wait.until(EC_or(
                EC.url_changes(prev_url),
                EC.invisibility_of_element( \
                    (By.CSS_SELECTOR, ''.join(selectors)))
            ))
        except Exception as e:
            self._logger.error('시간이 초과되었습니다', detail=e)
            self._onerror()
            return None

        url = self._driver.current_url
        if 'timeline' in url:
            self._logger.success('로그인 성공')
            return self
        elif 'pwd' in url.lower():
            # a.next 클릭으로 "다음에 변경하기" 클릭 가능
            # 사용자의 개인정보와 관련되어있기 때문에 직접 조작하도록 함
            self._logger.error('싸이월드에 직접 로그인하여 비밀번호 변경 페이지를 확인한 후 다시 시도해주세요')
            self._onerror()
            return None
        else:
            self._logger.error('사용자 정보를 확인해주세요')
            self._onerror()
            return None

    def home(self):
        self._logger.info('마이 홈으로 이동 중..')

        prev_url = self._driver.current_url

        # 유저 고유번호 추출
        profile = self._driver.find_element_by_css_selector('a.freak1')
        self._user_id = profile.get_attribute('href').split('/').pop()

        # 프로필 사진 영역 클릭
        self._driver.find_element_by_id('imggnbuser').click()

        try:
            self._wait.until(EC.url_changes(prev_url))
        except Exception as e:
            self._logger.error('시간이 초과되었습니다', detail=e)
            self._onerror()
            return None

        if 'home' not in self._driver.current_url:
            self._logger.error('마이 홈으로 이동할 수 없습니다')
            self._onerror()
            return None

        self._logger.success('이동 완료')
        return self

    def feeder(self, content_list, running):
        content_index = 0

        # 모든 타임라인 컨텐츠 영역 추출
        while self._driver.find_element_by_css_selector('p.btn_list_more'):
            contents = self._driver \
                .find_elements_by_css_selector(
                    'input[name="contentID[]"]'
                )[content_index:]

            for content in contents:
                cid = content.get_attribute('value')
                content_url = '{}/home/{}/post/{}/layer' \
                    .format(self._base_url, self._user_id, cid)
                self._logger.info('Feeder::', content_url, callback=False)

                if self._onlog:
                    self._onlog('{}개의 게시물 다운로드 중..'.format(content_index + 1))

                content_list.append(content_url)
                content_index += 1

            # 더 보기 버튼 대기
            try:
                next_button = self._wait.until(
                    EC.element_to_be_clickable(
                        (By.CSS_SELECTOR, 'p.btn_list_more')))
                time.sleep(self._delay)
            except:
                pass

            # 더 보기 버튼을 클릭할 수 없는 경우 (마지막 페이지인 경우) 반복 종료
            if not (next_button.is_displayed() and next_button.is_enabled()):
                running.value = 0
                break

            # 다음버튼 클릭
            next_button.click()

        running.value = 0
        self._driver.close()
        self._logger.info('Feeder:: 종료', callback=False)

        if self._onlog:
            self._onlog(
                '총 {}개의 게시물이 확인되었습니다.\n다운로드가 완료될 때 까지 잠시만 기다려주세요'.format(
                    content_index))

    def run(self, parser=2, downloader=2):
        self._logger.info('이미지 다운로드 작업 시작')
        start = time.time()

        # 멀티 프로세싱 처리를 위한 매니저
        with Manager() as manager:
            # 프로세스 목록
            processes = []

            # 공유 메모리 변수
            content_list = manager.list()
            image_list = manager.list()
            count = manager.Value('i', 0)
            lock = manager.Lock()
            feeder_running = manager.Value('i', 1)
            parser_running = manager.Value('i', 1)

            parser_logger = Logger('cybot_parser.log')
            downloader_logger = Logger('cybot_downloader.log')
            main_cookies = self._driver.get_cookies()
            cookie = []

            for c in main_cookies:
                cookie.append({'name': c['name'], 'value': c['value']})

            # 파서 프로세스 생성 및 시작
            for idx in range(parser):
                parser_instance = Parser(self._chromedriver, cookie,
                                         parser_logger, self._wait_time,
                                         self._delay, self._headless,
                                         self._options)
                parser_process = Process(target=parser_instance.parse,
                                         args=(content_list, image_list,
                                               feeder_running, parser_running),
                                         daemon=True)
                parser_process.name = 'Parser::' + str(idx)
                parser_process.start()
                processes.append(parser_process)
                self._logger.info('Parser', str(idx), '프로세스 시작')

            # 다운로더 프로세스 생성 및 시작
            for idx in range(downloader):
                downloader_instance = Downloader(downloader_logger)
                downloader_process = Process(
                    target=downloader_instance.download,
                    args=(image_list, count, lock, parser_running),
                    daemon=True)
                downloader_process.name = 'Downloader::' + str(idx)
                downloader_process.start()
                processes.append(downloader_process)
                self._logger.info('Downloader', str(idx), '프로세스 시작')

            # 피더 프로세스 시작
            self._logger.info('Feeder 시작')
            self.feeder(content_list, feeder_running)

            # 파서, 다운로더 프로세스가 종료되지않은 경우 대기
            for p in processes:
                p.join()

            self._logger.info('작업 소요시간: {}초' \
                .format(round(time.time() - start, 2)), callback=False)
            self._logger.info('전체 이미지 수: {}'.format(count.value),
                              callback=False)
            self._done()
Exemplo n.º 7
0
class OrderData:
    SKIP_ROWS = 4

    def __init__(self, fileName):
        self.fileName = fileName
        self.database = Database()
        self.logger = Logger()
        self.invalidOrders = 0

    def process(self):
        self.logger.info('Starting OrderData import')
        with open(os.getcwd() + "/watch/" + self.fileName,
                  encoding='utf-8') as csv_file:
            csv_reader = csv.reader(csv_file, delimiter=';')
            for _ in range(self.SKIP_ROWS):
                next(csv_reader)
            headers = next(csv_reader)
            orders = []

            rowCount = self.SKIP_ROWS
            currentOrder = []
            for order in csv_reader:
                rowCount += 1
                if len(order) > 0:
                    if order[headers.index('Totaalprijs')] != '':
                        if currentOrder:
                            orders.append(currentOrder)
                            currentOrder = []
                    currentOrder.append(order + [rowCount])

            start = time.time()
            self.logger.info("Processing {} orders".format(len(orders)))
            self.createOrderObj(headers, orders)

            self.logger.info('Import completed in: {}'.format(time.time() -
                                                              start))
            self.logger.info('Invalid orders: {}'.format(self.invalidOrders))

    def createOrderObj(self, headers, orders):
        for order in orders:
            restaurantName = order[0][headers.index('Winkelnaam')]
            restaurantId = Restaurant.getStoreByName(self.database,
                                                     restaurantName)
            customerName = order[0][headers.index('Klantnaam')]
            customerPhoneNr = order[0][headers.index('TelefoonNr')]
            customerEmail = order[0][headers.index('Email')]
            address = order[0][headers.index('Adres')]
            city = order[0][headers.index('Woonplaats')]
            addressId = Address.getOrCreateAddress(self.database, address,
                                                   city)

            customerId = Customer.createOrUpdateCustomer(
                self.database, customerEmail, customerName, customerPhoneNr,
                addressId)
            orderDate = parseDate(order[0][headers.index('Besteldatum')])
            deliveryTypeString = order[0][headers.index('AfleverType')]
            if deliveryTypeString == 'Bezorgen':
                deliveryType = True
            elif deliveryTypeString == 'Afhalen':
                deliveryType = False
            else:
                deliveryType = None

            deliveryDate = parseDate(order[0][headers.index('AfleverDatum')])
            deliveryTime = order[0][headers.index('AfleverMoment')]
            if re.match('^([0[0-9]|1[0-9]|2[0-3]):[0-5][0-9]$', deliveryTime):
                hour, minute = deliveryTime.split(':')
                deliveryDate = deliveryDate.replace(hour=int(hour),
                                                    minute=int(minute))
            totalPrice = priceToFloat(order[0][headers.index('Totaalprijs')])
            couponName = order[0][headers.index('Gebruikte Coupon')].strip()
            if len(couponName) > 0:
                couponId = Coupon.createCouponIfNotExists(
                    self.database, couponName)
            else:
                couponId = None
            couponDiscount = priceToFloat(
                order[0][headers.index('Coupon Korting')])
            paymentAmount = priceToFloat(order[0][headers.index('Te Betalen')])

            orderObj = Order.Order(restaurantId,
                                   customerId,
                                   deliveryType,
                                   couponDiscount,
                                   orderDate,
                                   deliveryDate,
                                   couponId,
                                   rowNumber=order[0][-1])

            for orderRow in order:
                productName = orderRow[headers.index('Product')].strip()
                crustName = orderRow[headers.index('PizzaBodem')]
                crustId = Product.getPizzaCrustByName(self.database, crustName)
                sauceName = orderRow[headers.index('PizzaSaus')]
                sauceId = Product.getSauceByName(self.database, sauceName)
                if crustName == '' and sauceName == '':
                    otherProductId = Product.getOtherProductIdByName(
                        self.database, productName)
                    pizzaId = None
                else:
                    pizzaId = Product.getPizzaIdByName(self.database,
                                                       productName)
                    if pizzaId is None:
                        pass
                    otherProductId = None
                price = priceToFloat(orderRow[headers.index('Prijs')])
                deliveryCosts = priceToFloat(
                    orderRow[headers.index('Bezorgkosten')])
                amount = int(orderRow[headers.index('Aantal')])
                extraIngredientString = orderRow[headers.index(
                    'Extra Ingrediënten')]
                if len(extraIngredientString.strip()) > 0:
                    extraIngredients = [
                        Product.getIngredientByName(self.database,
                                                    ingredient.strip())
                        for ingredient in extraIngredientString.split(',')
                    ]
                else:
                    extraIngredients = []
                priceExtraIngredients = priceToFloat(
                    orderRow[headers.index('Prijs Extra Ingrediënten')])
                orderRowPrice = priceToFloat(
                    orderRow[headers.index('Regelprijs')])

                orderLine = Order.OrderLine(orderRowPrice,
                                            amount,
                                            extraIngredients,
                                            sauceId,
                                            crustId,
                                            pizzaId,
                                            otherProductId,
                                            rowNumber=orderRow[-1])

                orderObj.addOrderLine(orderLine)

            self.createOrder(orderObj)

    def createOrder(self, orderObj):
        orderValid, orderErrors, = orderObj.isValid()
        if orderValid:
            orderId = Order.createOrder(self.database, orderObj)
            for orderLine in orderObj.orderLines:
                Order.createOrderLine(self.database, orderLine, orderId)
        else:
            self.logger.error('Order not valid row: {}, error: {}'.format(
                orderObj.rowNumber, ', '.join(orderErrors)))

            for row in orderObj.orderLines:
                rowValid, rowErrors = row.isValid()
                if not rowValid:
                    self.logger.error(
                        'Order line not valid row: {}, error: {}'.format(
                            row.rowNumber, ', '.join(rowErrors)))

            self.invalidOrders += 1