Exemplo n.º 1
0
Arquivo: naver.py Projeto: Uzihoon/EOS
  def __init__(self, key, url, search):
    ## regexp
    self.__blog_target_reg = re.compile('blog.naver.com')
    self.__palce_target_reg = re.compile('store.naver.com/restaurants/detail')
    self.__log_reg = re.compile('(?<=logNo=)\w+')
    self.__basic_reg = re.compile('^[^?]+')
    self.__dir_reg = re.compile('(?<=dirId=)\w+')
    self.__doc_reg = re.compile('(?<=docId=)\w+')
    self.__id_reg = re.compile("(?<=id=)\w+")
    
    self.__key = key
    self.__search_type = search
    self.__where = self.search_type(self.__search_type)
    self.__url = self.format_kin_url(url) if self.__search_type == "kin" else common.deleteHttp(url)

    ## naver place 외 default url 적용
    self.__default_url = "https://search.naver.com/search.naver?where={0}&query={1}".format(self.__where, self.__key)
    self.__place_url = "https://store.naver.com/restaurants/list?query={0}".format(self.__key)
    self.__driver_url = self.__place_url if self.__search_type == "place" else self.__default_url

    ## chrome driver
    self.driver = webdriver.Chrome("./chromedriver")
    self.driver.implicitly_wait(1)

    self.__post_rank = None
Exemplo n.º 2
0
Arquivo: naver.py Projeto: Uzihoon/EOS
 def format_blog_url(self, url):
   check_url = common.deleteHttp(url)
   reg_url = self.__blog_target_reg.search(check_url)
   if reg_url is not None:
     log_no = self.__log_reg.search(check_url).group()
     basic_url = self.__basic_reg.search(check_url).group()
     check_url = "{0}/{1}".format(basic_url, log_no)
   return check_url
Exemplo n.º 3
0
    def check_post_rank(self, li_list):
        post_rank = False
        for idx, val in enumerate(li_list, 1):
            link = val.find('a', class_=self.__a_link_class).get("href")
            check_url = common.deleteHttp(link)
            if check_url == self.__url:
                post_rank = idx
                break

        return post_rank
Exemplo n.º 4
0
Arquivo: naver.py Projeto: Uzihoon/EOS
 def format_kin_url(self, url):
   try:
     check_url = common.deleteHttp(url)
     reg_url = self.__basic_reg.search(check_url).group()
     if reg_url is not None:
       dirId = self.__dir_reg.search(check_url).group()
       docId = self.__doc_reg.search(check_url).group()
       check_url = "{0}?dirId={1}&docId={2}".format(reg_url, dirId, docId)
   except AttributeError:
     return -1
   else:
     return check_url
Exemplo n.º 5
0
Arquivo: naver.py Projeto: Uzihoon/EOS
  def find_target_post(self):
    post_rank = None
    ## url
    current = self.driver.current_url
    ## page parameter url 
    start_str = self.make_start_str(self.__search_type)
    ## element selector
    a_link_class = self.make_a_link_class(self.__search_type)
    list_container = self.make_list_container(self.__search_type)
    list_item = "{0} > {1}".format(self.make_ul_select(self.__search_type), self.make_li_select(self.__search_type))

    ## naver 탐색
    for x in range(0, 10):

      pages = (x + 1) if self.__search_type == 'place' else (x * 10) + 1
      current_page = start_str + repr(pages)
      self.driver.get(current + current_page)

      ## naver list container element 가져오기
      html = self.driver.find_element_by_css_selector(list_container).get_attribute('innerHTML')

      ## HTML Parsing
      soup = BeautifulSoup(html, 'html.parser')
      li_list = soup.select(list_item)
      isBreak = False

      for idx, val in enumerate(li_list, 1):
        a_tag = val.find('a', class_= a_link_class)
        link = str()

        if a_tag is not None:
          link = a_tag.get('href')

        check_url = str()
        if self.__search_type == 'blog':
          check_url = self.format_blog_url(link)
        elif self.__search_type == 'kin':
          check_url = self.format_kin_url(link)
        elif self.__search_type == 'place':
          check_url = self.format_place_url(link)
        else:
          check_url = common.deleteHttp(link)
        
        if check_url == self.__url:
          isBreak = True
          post_rank = (x * 10 + idx)
          break
      if isBreak:
        return post_rank
Exemplo n.º 6
0
    def __init__(self, key, url, sort_type):
        self.__key = key
        self.__sort_type = sort_type
        self.__url = common.deleteHttp(url)
        self.__driver_url = "https://map.kakao.com/?from=total&q={0}&tab=place&nil_suggest=btn".format(
            self.__key)

        ## element css selector
        self.__list_container = ".placelist"
        self.__li_select = ".PlaceItem"
        self.__a_link_class = "moreview"

        ## rank
        self.__post_rank = None

        ## chrome driver
        self.driver = webdriver.Chrome("./chromedriver")
        self.driver.implicitly_wait(1)

        self.num_of_page = 2
Exemplo n.º 7
0
Arquivo: naver.py Projeto: Uzihoon/EOS
 def format_place_url(self, url):
   check_url = common.deleteHttp(url)
   basic_url = self.__basic_reg.search(check_url).group()
   url_id = self.__id_reg.search(check_url).group()
   check_url = "{0}?id={1}".format(basic_url, url_id)
   return check_url