def main(): log = Logger(dir_log + "test.log") print(log._timestamp()) log._log('일반적인 설명문장을 로그에 기록 합니다.') log.info('정보에 관한 로그문장을 로그에 기록 합니다.') log.error('에러발생시 에러메시지를 로그에 기록 합니다.') log.warning('경고 발생시 메시지를 로그에 기록 합니다.')
def process_event(event, context): logger = Logger(event) processor = Processor(event, context, logger) try: processor.validate() processor.write_to_kinesis() logger.debug('Successfully converted to json', payload=processor.payload) return ok() except JsonSchemaException as json_exception: # schema validation failed logger.error('Validation error: ' + str(json_exception)) return nok(HTTP_BAD_REQUEST) except ClientError as client_error: # kinesis error logger.error('Client error: ' + str(client_error)) return nok(HTTP_TOO_MANY_REQUESTS)
def phase_start_time(logger: Logger, log_prefix: str, index: int, data: str) -> Optional[datetime]: '''Get the start time for a given phase and return a time stamp''' # dates look like: # Sun Apr 25 16:59:09 2021 # Sat May 1 03:10:43 2021 # 1 2 3 4 5 6 7 8 <- group expression = r'... (\w{3}) (\w{3})(\s{1,2})(\d+) (\d+):(\d+):(\d+) (\d+)' res = re.search(expression, data[:250]) if not res: logger.error(f'{log_prefix} index {index} failed to extract date') return None have: int = len(res.groups()) need: int = 8 if have != need: logger.error( f'{log_prefix} index {index} failed to match date, need {need} groups, have {have} groups' ) return None month = res.group(2) day = res.group(4) hour = res.group(5) minute = res.group(6) second = res.group(7) year = res.group(8) date_string: str = f'{year}-{month}-{day} {hour}:{minute}:{second}' date_format: str = '%Y-%b-%d %H:%M:%S' dt = datetime.strptime(date_string, date_format) logger.debug(f'{log_prefix} index {index} start time {dt}') return dt
class CyBot: def __init__(self, chromedriver, wait=5, delay=3): self._logger = Logger('cybot.log') self._logger.info('크롬 드라이버 로딩 중..') driver = webdriver.Chrome(chromedriver) driver.implicitly_wait(wait) self._logger.info('크롬 드라이버 로딩 완료') self._chromedriver = chromedriver self._base_url = 'https://cy.cyworld.com' self._user_id = '' self._wait_time = wait self._delay = delay self._driver = driver self._wait = WebDriverWait(driver, wait) def init(self): self._logger.info('싸이월드 홈페이지 접속 중..') # 싸이월드 페이지 열기 self._driver.get('https://cyworld.com') self._logger.success('싸이월드 홈페이지 접속 완료') return self def login(self, user_email, user_password): self._logger.info('로그인 시도 중..') prev_url = self._driver.current_url self._driver.find_element_by_name('email').send_keys(user_email) self._driver.find_element_by_name('passwd').send_keys(user_password, Keys.RETURN) try: self._wait.until(EC_or( EC.url_changes(prev_url), EC.invisibility_of_element( \ (By.CSS_SELECTOR, '.ui-dialog.ui-widget.ui-widget-content.ui-corner-all.ui-front.ui-draggable.ui-resizable')) )) except: self._logger.error('시간이 초과되었습니다') exit() url = self._driver.current_url if 'timeline' in url: self._logger.success('로그인 성공') return self else: self._logger.error('사용자 정보를 확인해주세요') exit() def home(self): self._logger.info('마이 홈으로 이동 중..') prev_url = self._driver.current_url # 유저 고유번호 추출 profile = self._driver.find_element_by_css_selector('a.freak1') self._user_id = profile.get_attribute('href').split('/').pop() # 프로필 사진 영역 클릭 self._driver.find_element_by_id('imggnbuser').click() try: self._wait.until(EC.url_changes(prev_url)) except: self._logger.error('시간이 초과되었습니다') exit() if 'home' not in self._driver.current_url: self._logger.error('마이 홈으로 이동할 수 없습니다') exit() self._logger.success('이동 완료') return self def feeder(self, content_list, running): content_index = 0 # 모든 타임라인 컨텐츠 영역 추출 while self._driver.find_element_by_css_selector('p.btn_list_more'): contents = self._driver \ .find_elements_by_css_selector( 'input[name="contentID[]"]' )[content_index:] for content in contents: cid = content.get_attribute('value') content_url = '{}/home/{}/post/{}/layer' \ .format(self._base_url, self._user_id, cid) self._logger.info('Feeder::', content_url) content_list.append(content_url) content_index += 1 # 더 보기 버튼 대기 try: next_button = self._wait.until( EC.element_to_be_clickable( (By.CSS_SELECTOR, 'p.btn_list_more')) ) time.sleep(self._delay) except: pass # 더 보기 버튼을 클릭할 수 없는 경우 (마지막 페이지인 경우) 반복 종료 if not (next_button.is_displayed() and next_button.is_enabled()): running.value = 0 break # 다음버튼 클릭 next_button.click() running.value = 0 self._driver.close() self._logger.info('Feeder:: 종료') def run(self, parser=2, downloader=2): self._logger.info('이미지 다운로드 작업 시작') start = time.time() # 멀티 프로세싱 처리를 위한 매니저 with Manager() as manager: # 프로세스 목록 processes = [] # 공유 메모리 변수 content_list = manager.list() image_list = manager.list() count = manager.Value('i', 0) lock = manager.Lock() feeder_running = manager.Value('i', 1) parser_running = manager.Value('i', 1) parser_logger = Logger('cybot_parser.log') downloader_logger = Logger('cybot_downloader.log') main_cookies = self._driver.get_cookies() cookie = [] for c in main_cookies: cookie.append({ 'name': c['name'], 'value': c['value'] }) # 파서 프로세스 생성 및 시작 for idx in range(parser): parser_instance = Parser( self._chromedriver, cookie, parser_logger, self._wait_time, self._delay ) parser_process = Process( target=parser_instance.parse, \ args=( content_list, image_list, feeder_running, parser_running ) ) parser_process.name = 'Parser::' + str(idx) parser_process.start() processes.append(parser_process) self._logger.info('Parser', str(idx), '프로세스 시작') # 다운로더 프로세스 생성 및 시작 for idx in range(downloader): downloader_instance = Downloader(downloader_logger) downloader_process = Process( target=downloader_instance.download, \ args=(image_list, count, lock, parser_running)) downloader_process.name = 'Downloader::' + str(idx) downloader_process.start() processes.append(downloader_process) self._logger.info('Downloader', str(idx), '프로세스 시작') # 피더 프로세스 시작 self._logger.info('Feeder 시작') self.feeder(content_list, feeder_running) # 파서, 다운로더 프로세스가 종료되지않은 경우 대기 for p in processes: p.join() self._logger.info('작업 소요시간: {}초' \ .format(round(time.time() - start, 2))) self._logger.info('전체 이미지 수: {}'.format(count.value))
from src.conf import Config from src.logger import Logger from sqlalchemy.ext.declarative import declarative_base from geopy.geocoders import Nominatim from sqlalchemy import exc import pymysql JSON_FILE_NAME = "src/mining_constants.json" logger = Logger().logger config = Config(JSON_FILE_NAME) # mapper & MetaData: maps the subclass to the table and holds all the information about the database Base = declarative_base() from database.database import Database try: database = Database() geolocator = Nominatim(user_agent=f"{config.DB_NAME}", timeout=3) except exc.NoSuchModuleError as err: print( err._message(), f"\tinput: sql extension= {config.SQL_EXTENSION}, python DBAPI= {config.PYTHON_DBAPI}" ) exit() except pymysql.err.OperationalError as err: logger.error(config.CONNECTION_ERROR.format(err.args[1])) exit()
class CyBot: __VERSION__ = '1.0.2' def __init__(self, chromedriver, wait=5, delay=3, \ headless=False, onlog=None, onerror=exit, done=exit): self._logger = Logger('cybot.log', callback=onlog) self._chromedriver = chromedriver self._base_url = 'https://cy.cyworld.com' self._user_id = '' self._wait_time = wait self._delay = delay self._headless = headless self._onlog = onlog self._onerror = onerror self._done = done self._options = None self._driver = None self._wait = None def init(self): self._logger.info('크롬 드라이버 로딩 중..') try: options = webdriver.ChromeOptions() if self._headless: options.add_argument('headless') options.add_argument('window-size=800x600') options.add_argument("disable-gpu") options.add_argument('log-level=3') options.add_argument('--ignore-certificate-errors') options.add_argument('--ignore-ssl-errors') driver = webdriver.Chrome(self._chromedriver, \ chrome_options=options) driver.implicitly_wait(self._wait_time) except Exception as e: self._logger.error('크롬 드라이버 로딩 실패', detail=e) self._onerror() return self._options = options self._driver = driver self._wait = WebDriverWait(driver, self._wait_time) self._logger.info('크롬 드라이버 로딩 완료') # 싸이월드 페이지 열기 self._logger.info('싸이월드 홈페이지 접속 중..') self._driver.get('https://cyworld.com') self._logger.success('싸이월드 홈페이지 접속 완료') return self def login(self, user_email, user_password): self._logger.info('로그인 시도 중..') prev_url = self._driver.current_url try: self._driver.find_element_by_name('email').send_keys(user_email) self._driver.execute_script( 'arguments[0].value = "{}"'.format(user_password), self._driver.find_element_by_name('passwd')) self._driver.find_element_by_name('passwd').send_keys(Keys.RETURN) except Exception as e: self._logger.error('알 수 없는 오류가 발생했습니다', detail=e) self._onerror() return None time.sleep(3) try: selectors = [ '.ui-dialog', '.ui-widget', '.ui-widget-content', '.ui-corner-all', '.ui-front', '.ui-draggable', '.ui-resizable' ] self._wait.until(EC_or( EC.url_changes(prev_url), EC.invisibility_of_element( \ (By.CSS_SELECTOR, ''.join(selectors))) )) except Exception as e: self._logger.error('시간이 초과되었습니다', detail=e) self._onerror() return None url = self._driver.current_url if 'timeline' in url: self._logger.success('로그인 성공') return self elif 'pwd' in url.lower(): # a.next 클릭으로 "다음에 변경하기" 클릭 가능 # 사용자의 개인정보와 관련되어있기 때문에 직접 조작하도록 함 self._logger.error('싸이월드에 직접 로그인하여 비밀번호 변경 페이지를 확인한 후 다시 시도해주세요') self._onerror() return None else: self._logger.error('사용자 정보를 확인해주세요') self._onerror() return None def home(self): self._logger.info('마이 홈으로 이동 중..') prev_url = self._driver.current_url # 유저 고유번호 추출 profile = self._driver.find_element_by_css_selector('a.freak1') self._user_id = profile.get_attribute('href').split('/').pop() # 프로필 사진 영역 클릭 self._driver.find_element_by_id('imggnbuser').click() try: self._wait.until(EC.url_changes(prev_url)) except Exception as e: self._logger.error('시간이 초과되었습니다', detail=e) self._onerror() return None if 'home' not in self._driver.current_url: self._logger.error('마이 홈으로 이동할 수 없습니다') self._onerror() return None self._logger.success('이동 완료') return self def feeder(self, content_list, running): content_index = 0 # 모든 타임라인 컨텐츠 영역 추출 while self._driver.find_element_by_css_selector('p.btn_list_more'): contents = self._driver \ .find_elements_by_css_selector( 'input[name="contentID[]"]' )[content_index:] for content in contents: cid = content.get_attribute('value') content_url = '{}/home/{}/post/{}/layer' \ .format(self._base_url, self._user_id, cid) self._logger.info('Feeder::', content_url, callback=False) if self._onlog: self._onlog('{}개의 게시물 다운로드 중..'.format(content_index + 1)) content_list.append(content_url) content_index += 1 # 더 보기 버튼 대기 try: next_button = self._wait.until( EC.element_to_be_clickable( (By.CSS_SELECTOR, 'p.btn_list_more'))) time.sleep(self._delay) except: pass # 더 보기 버튼을 클릭할 수 없는 경우 (마지막 페이지인 경우) 반복 종료 if not (next_button.is_displayed() and next_button.is_enabled()): running.value = 0 break # 다음버튼 클릭 next_button.click() running.value = 0 self._driver.close() self._logger.info('Feeder:: 종료', callback=False) if self._onlog: self._onlog( '총 {}개의 게시물이 확인되었습니다.\n다운로드가 완료될 때 까지 잠시만 기다려주세요'.format( content_index)) def run(self, parser=2, downloader=2): self._logger.info('이미지 다운로드 작업 시작') start = time.time() # 멀티 프로세싱 처리를 위한 매니저 with Manager() as manager: # 프로세스 목록 processes = [] # 공유 메모리 변수 content_list = manager.list() image_list = manager.list() count = manager.Value('i', 0) lock = manager.Lock() feeder_running = manager.Value('i', 1) parser_running = manager.Value('i', 1) parser_logger = Logger('cybot_parser.log') downloader_logger = Logger('cybot_downloader.log') main_cookies = self._driver.get_cookies() cookie = [] for c in main_cookies: cookie.append({'name': c['name'], 'value': c['value']}) # 파서 프로세스 생성 및 시작 for idx in range(parser): parser_instance = Parser(self._chromedriver, cookie, parser_logger, self._wait_time, self._delay, self._headless, self._options) parser_process = Process(target=parser_instance.parse, args=(content_list, image_list, feeder_running, parser_running), daemon=True) parser_process.name = 'Parser::' + str(idx) parser_process.start() processes.append(parser_process) self._logger.info('Parser', str(idx), '프로세스 시작') # 다운로더 프로세스 생성 및 시작 for idx in range(downloader): downloader_instance = Downloader(downloader_logger) downloader_process = Process( target=downloader_instance.download, args=(image_list, count, lock, parser_running), daemon=True) downloader_process.name = 'Downloader::' + str(idx) downloader_process.start() processes.append(downloader_process) self._logger.info('Downloader', str(idx), '프로세스 시작') # 피더 프로세스 시작 self._logger.info('Feeder 시작') self.feeder(content_list, feeder_running) # 파서, 다운로더 프로세스가 종료되지않은 경우 대기 for p in processes: p.join() self._logger.info('작업 소요시간: {}초' \ .format(round(time.time() - start, 2)), callback=False) self._logger.info('전체 이미지 수: {}'.format(count.value), callback=False) self._done()
class OrderData: SKIP_ROWS = 4 def __init__(self, fileName): self.fileName = fileName self.database = Database() self.logger = Logger() self.invalidOrders = 0 def process(self): self.logger.info('Starting OrderData import') with open(os.getcwd() + "/watch/" + self.fileName, encoding='utf-8') as csv_file: csv_reader = csv.reader(csv_file, delimiter=';') for _ in range(self.SKIP_ROWS): next(csv_reader) headers = next(csv_reader) orders = [] rowCount = self.SKIP_ROWS currentOrder = [] for order in csv_reader: rowCount += 1 if len(order) > 0: if order[headers.index('Totaalprijs')] != '': if currentOrder: orders.append(currentOrder) currentOrder = [] currentOrder.append(order + [rowCount]) start = time.time() self.logger.info("Processing {} orders".format(len(orders))) self.createOrderObj(headers, orders) self.logger.info('Import completed in: {}'.format(time.time() - start)) self.logger.info('Invalid orders: {}'.format(self.invalidOrders)) def createOrderObj(self, headers, orders): for order in orders: restaurantName = order[0][headers.index('Winkelnaam')] restaurantId = Restaurant.getStoreByName(self.database, restaurantName) customerName = order[0][headers.index('Klantnaam')] customerPhoneNr = order[0][headers.index('TelefoonNr')] customerEmail = order[0][headers.index('Email')] address = order[0][headers.index('Adres')] city = order[0][headers.index('Woonplaats')] addressId = Address.getOrCreateAddress(self.database, address, city) customerId = Customer.createOrUpdateCustomer( self.database, customerEmail, customerName, customerPhoneNr, addressId) orderDate = parseDate(order[0][headers.index('Besteldatum')]) deliveryTypeString = order[0][headers.index('AfleverType')] if deliveryTypeString == 'Bezorgen': deliveryType = True elif deliveryTypeString == 'Afhalen': deliveryType = False else: deliveryType = None deliveryDate = parseDate(order[0][headers.index('AfleverDatum')]) deliveryTime = order[0][headers.index('AfleverMoment')] if re.match('^([0[0-9]|1[0-9]|2[0-3]):[0-5][0-9]$', deliveryTime): hour, minute = deliveryTime.split(':') deliveryDate = deliveryDate.replace(hour=int(hour), minute=int(minute)) totalPrice = priceToFloat(order[0][headers.index('Totaalprijs')]) couponName = order[0][headers.index('Gebruikte Coupon')].strip() if len(couponName) > 0: couponId = Coupon.createCouponIfNotExists( self.database, couponName) else: couponId = None couponDiscount = priceToFloat( order[0][headers.index('Coupon Korting')]) paymentAmount = priceToFloat(order[0][headers.index('Te Betalen')]) orderObj = Order.Order(restaurantId, customerId, deliveryType, couponDiscount, orderDate, deliveryDate, couponId, rowNumber=order[0][-1]) for orderRow in order: productName = orderRow[headers.index('Product')].strip() crustName = orderRow[headers.index('PizzaBodem')] crustId = Product.getPizzaCrustByName(self.database, crustName) sauceName = orderRow[headers.index('PizzaSaus')] sauceId = Product.getSauceByName(self.database, sauceName) if crustName == '' and sauceName == '': otherProductId = Product.getOtherProductIdByName( self.database, productName) pizzaId = None else: pizzaId = Product.getPizzaIdByName(self.database, productName) if pizzaId is None: pass otherProductId = None price = priceToFloat(orderRow[headers.index('Prijs')]) deliveryCosts = priceToFloat( orderRow[headers.index('Bezorgkosten')]) amount = int(orderRow[headers.index('Aantal')]) extraIngredientString = orderRow[headers.index( 'Extra Ingrediënten')] if len(extraIngredientString.strip()) > 0: extraIngredients = [ Product.getIngredientByName(self.database, ingredient.strip()) for ingredient in extraIngredientString.split(',') ] else: extraIngredients = [] priceExtraIngredients = priceToFloat( orderRow[headers.index('Prijs Extra Ingrediënten')]) orderRowPrice = priceToFloat( orderRow[headers.index('Regelprijs')]) orderLine = Order.OrderLine(orderRowPrice, amount, extraIngredients, sauceId, crustId, pizzaId, otherProductId, rowNumber=orderRow[-1]) orderObj.addOrderLine(orderLine) self.createOrder(orderObj) def createOrder(self, orderObj): orderValid, orderErrors, = orderObj.isValid() if orderValid: orderId = Order.createOrder(self.database, orderObj) for orderLine in orderObj.orderLines: Order.createOrderLine(self.database, orderLine, orderId) else: self.logger.error('Order not valid row: {}, error: {}'.format( orderObj.rowNumber, ', '.join(orderErrors))) for row in orderObj.orderLines: rowValid, rowErrors = row.isValid() if not rowValid: self.logger.error( 'Order line not valid row: {}, error: {}'.format( row.rowNumber, ', '.join(rowErrors))) self.invalidOrders += 1