def __init__(self, key, url, search): ## regexp self.__blog_target_reg = re.compile('blog.naver.com') self.__palce_target_reg = re.compile('store.naver.com/restaurants/detail') self.__log_reg = re.compile('(?<=logNo=)\w+') self.__basic_reg = re.compile('^[^?]+') self.__dir_reg = re.compile('(?<=dirId=)\w+') self.__doc_reg = re.compile('(?<=docId=)\w+') self.__id_reg = re.compile("(?<=id=)\w+") self.__key = key self.__search_type = search self.__where = self.search_type(self.__search_type) self.__url = self.format_kin_url(url) if self.__search_type == "kin" else common.deleteHttp(url) ## naver place 외 default url 적용 self.__default_url = "https://search.naver.com/search.naver?where={0}&query={1}".format(self.__where, self.__key) self.__place_url = "https://store.naver.com/restaurants/list?query={0}".format(self.__key) self.__driver_url = self.__place_url if self.__search_type == "place" else self.__default_url ## chrome driver self.driver = webdriver.Chrome("./chromedriver") self.driver.implicitly_wait(1) self.__post_rank = None
def format_blog_url(self, url): check_url = common.deleteHttp(url) reg_url = self.__blog_target_reg.search(check_url) if reg_url is not None: log_no = self.__log_reg.search(check_url).group() basic_url = self.__basic_reg.search(check_url).group() check_url = "{0}/{1}".format(basic_url, log_no) return check_url
def check_post_rank(self, li_list): post_rank = False for idx, val in enumerate(li_list, 1): link = val.find('a', class_=self.__a_link_class).get("href") check_url = common.deleteHttp(link) if check_url == self.__url: post_rank = idx break return post_rank
def format_kin_url(self, url): try: check_url = common.deleteHttp(url) reg_url = self.__basic_reg.search(check_url).group() if reg_url is not None: dirId = self.__dir_reg.search(check_url).group() docId = self.__doc_reg.search(check_url).group() check_url = "{0}?dirId={1}&docId={2}".format(reg_url, dirId, docId) except AttributeError: return -1 else: return check_url
def find_target_post(self): post_rank = None ## url current = self.driver.current_url ## page parameter url start_str = self.make_start_str(self.__search_type) ## element selector a_link_class = self.make_a_link_class(self.__search_type) list_container = self.make_list_container(self.__search_type) list_item = "{0} > {1}".format(self.make_ul_select(self.__search_type), self.make_li_select(self.__search_type)) ## naver 탐색 for x in range(0, 10): pages = (x + 1) if self.__search_type == 'place' else (x * 10) + 1 current_page = start_str + repr(pages) self.driver.get(current + current_page) ## naver list container element 가져오기 html = self.driver.find_element_by_css_selector(list_container).get_attribute('innerHTML') ## HTML Parsing soup = BeautifulSoup(html, 'html.parser') li_list = soup.select(list_item) isBreak = False for idx, val in enumerate(li_list, 1): a_tag = val.find('a', class_= a_link_class) link = str() if a_tag is not None: link = a_tag.get('href') check_url = str() if self.__search_type == 'blog': check_url = self.format_blog_url(link) elif self.__search_type == 'kin': check_url = self.format_kin_url(link) elif self.__search_type == 'place': check_url = self.format_place_url(link) else: check_url = common.deleteHttp(link) if check_url == self.__url: isBreak = True post_rank = (x * 10 + idx) break if isBreak: return post_rank
def __init__(self, key, url, sort_type): self.__key = key self.__sort_type = sort_type self.__url = common.deleteHttp(url) self.__driver_url = "https://map.kakao.com/?from=total&q={0}&tab=place&nil_suggest=btn".format( self.__key) ## element css selector self.__list_container = ".placelist" self.__li_select = ".PlaceItem" self.__a_link_class = "moreview" ## rank self.__post_rank = None ## chrome driver self.driver = webdriver.Chrome("./chromedriver") self.driver.implicitly_wait(1) self.num_of_page = 2
def format_place_url(self, url): check_url = common.deleteHttp(url) basic_url = self.__basic_reg.search(check_url).group() url_id = self.__id_reg.search(check_url).group() check_url = "{0}?id={1}".format(basic_url, url_id) return check_url