예제 #1
0
 def __init__(self, url):
     super(newpct_page, self).__init__()
     self.url = url
     self.bs4 = bs4(url)
     self.id = url.__hash__()
     self.get_info()
     self.bs4 = None
예제 #2
0
 def get_links_pagination(self, url):
     this_bs4 = bs4(url)
     pre_links = []
     for link in this_bs4.soup.find_all(
             "ul", class_="buscar-list")[0].findAll('a', href=True):
         pre_links.append(link['href'].encode('utf-8'))
     return pre_links
예제 #3
0
 def __init__(self, url):
     self.url = url
     self.bs4 = bs4(url)
     id = re.findall('\d+', url)
     self.id = id[0]
     self.get_info()
     self.bs4 = None
     super(filmaffinity_page, self).__init__()
예제 #4
0
def get_links_vo(url):
    return_link_list = []
    this_bs4 = bs4(url)
    for link in this_bs4.get_all_links():
        if any(re.findall(tag_vo, link['href'])):
            if any(re.findall(pagination_vo, link['href'])) and not any(
                    re.findall(pagination_vo, url)):
                return_link_list += get_links_vo(link['href'])
            else:
                if not any(re.findall(pagination_vo, link['href'])):
                    return_link_list.append(link['href'])
    return return_link_list
예제 #5
0
    def __init__(self, url):
        self.url = url
        self.bs4 = bs4(url)
        self.id = url.__hash__()

        file_path = os.path.abspath(
            os.path.join(os.path.join(os.path.dirname(__file__)),
                         os.pardir)) + '/state/newpct_series/%s.json' % self.id
        self.json = self.readJSONfile(file_path)

        self.get_info()
        self.bs4 = None
예제 #6
0
def get_links_hd(url):
    return_link_list = []
    this_bs4 = bs4(url)
    for link in this_bs4.get_all_links():
        if any(re.findall(tag_hd, link['href'])):
            if any(re.findall(pagination_hd, link['href'])) and not any(
                    re.findall(pagination_hd, url)):
                return_link_list += get_links_hd(link['href'])
            else:
                if not any(re.findall(pagination_hd, link['href'])):
                    return_link_list.append(link['href'])
    #return_link_list = ['http://www.newpct1.com/series-hd/anatomia-de-grey/2259']

    return return_link_list