Exemplo n.º 1
0
    def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb):
        try:
            query = urlparse.urljoin(self.base_link,
                                     self.tvsearch_link % urllib.quote_plus(title.replace('\'', '').rsplit(':', 1)[0]))

            html = proxy.get(query, 'item')
            if 'page=2' in html or 'page%3D2' in html:
                html2 = proxy.get(query + '&page=2', 'item')
                html += html2

            html = BeautifulSoup(html)

            cleaned_title = 'watchputlocker' + clean_title(title)
            years = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1)]

            items = html.findAll('div', attrs={'class': 'item'})

            show_url = None
            for item in items:
                links = item.findAll('a')
                for link in links:
                    href = link['href']
                    link_title = link['title']
                    try:
                        href = urlparse.parse_qs(urlparse.urlparse(href).query)['u'][0]
                    except:
                        pass
                    try:
                        href = urlparse.parse_qs(urlparse.urlparse(href).query)['q'][0]
                    except:
                        pass
                    if cleaned_title == clean_title(link_title) and show_year in link_title:
                        url = re.findall('(?://.+?|)(/.+)', href)[0]
                        show_url = urlparse.urljoin(self.base_link, replaceHTMLCodes(url))
                    else:
                        continue

                    html = BeautifulSoup(proxy.get(show_url, 'tv_episode_item'))
                    season_items = html.findAll('div', attrs={'class': 'show_season'})
                    for season_item in season_items:
                        if season_item["data-id"] != season:
                            continue
                        episode_items = season_item.findAll('div', attrs={'class': 'tv_episode_item'})
                        for episode_item in episode_items:
                            link = episode_item.findAll('a')[-1]
                            href = link["href"]
                            link_episode = link.contents[0].strip()
                            if link_episode != "E%s" % (episode):
                                continue
                            link_airdate = link.findAll('span', attrs={'class': 'tv_num_versions'})[-1]  # WTF
                            link_airdate = link_airdate.contents[0]
                            if any(candidate_year in link_airdate for candidate_year in years):
                                return self.sources(href)

        except:
            pass
        return []
Exemplo n.º 2
0
    def get_html(self, title, search_link):
        key = self.get_key()
        query = search_link % (urllib.quote_plus(title.replace('\'', '').rsplit(':', 1)[0]), key)
        query = urlparse.urljoin(self.base_link, query)

        html = proxy.get(query, ('index_item'))
        if 'index_item' in html:
            if 'page=2' in html or 'page%3D2' in html:
                html2 = proxy.get(query + '&page=2', 'index_item')
                html += html2
            return html
    def get_html(self, title, search_link):
        key = self.get_key()
        query = search_link % (
        urllib.quote_plus(" ".join(title.translate(None, '\'"?:!@#$&-,').split()).rsplit(':', 1)[0]), key)
        query = urlparse.urljoin(self.base_link, query)

        html = proxy.get(query, ('index_item'))
        if 'index_item' in html:
            if 'page=2' in html or 'page%3D2' in html:
                html2 = proxy.get(query + '&page=2', 'index_item')
                html += html2
            return html
Exemplo n.º 4
0
    def get_html(self, title, search_link):
        key = self.get_key()
        query = search_link % (urllib.quote_plus(
            title.replace('\'', '').rsplit(':', 1)[0]), key)
        query = urlparse.urljoin(self.base_link, query)

        html = proxy.get(query, ('index_item'))
        if 'index_item' in html:
            if 'page=2' in html or 'page%3D2' in html:
                html2 = proxy.get(query + '&page=2', 'index_item')
                html += html2
            return html
Exemplo n.º 5
0
    def get_html(self, title, search_link):
        key = self.get_key()
        query = search_link % (
        urllib.quote_plus(" ".join(title.translate(None, '\'"?:!@#$&-,').split()).rsplit(':', 1)[0]), key)
        query = urlparse.urljoin(self.base_link, query)

        html = proxy.get(query, ('index_item'))
        if 'index_item' in html:
            if 'page=2' in html or 'page%3D2' in html:
                html2 = proxy.get(query + '&page=2', 'index_item')
                html += html2
            return html
Exemplo n.º 6
0
    def scrape_movie(self, title, year, imdb):
        try:
            query = self.moviesearch_link % urllib.quote_plus(
                title.replace('\'', '').rsplit(':', 1)[0])
            query = urlparse.urljoin(self.base_link, query)

            html = proxy.get(query, 'item')
            if 'page=2' in html or 'page%3D2' in html:
                html2 = proxy.get(query + '&page=2', 'item')
                html += html2

            html = BeautifulSoup(html)

            cleaned_title = 'watchputlocker' + clean_title(title)
            years = [
                '(%s)' % str(year),
                '(%s)' % str(int(year) + 1),
                '(%s)' % str(int(year) - 1)
            ]

            items = html.findAll('div', attrs={'class': 'item'})

            for item in items:
                links = item.findAll('a')
                for link in links:
                    href = link['href']
                    link_title = link['title']
                    if any(candidate_year in link_title
                           for candidate_year in years):
                        try:
                            href = urlparse.parse_qs(
                                urlparse.urlparse(href).query)['u'][0]
                        except:
                            pass
                        try:
                            href = urlparse.parse_qs(
                                urlparse.urlparse(href).query)['q'][0]
                        except:
                            pass

                        if cleaned_title == clean_title(link_title):
                            url = re.findall('(?://.+?|)(/.+)', href)[0]
                            url = replaceHTMLCodes(url)
                            return self.sources(url)
        except:
            pass
        return []
Exemplo n.º 7
0
    def sources(self, url):
        sources = []
        try:
            if url == None: return sources

            url = urlparse.urljoin(self.base_link, url)
            html = proxy.get(url, 'choose_tabs')
            parsed_html = BeautifulSoup(html)

            table_bodies = parsed_html.findAll('tbody')
            count = 0
            for table_body in table_bodies:
                try:
                    link = table_body.findAll('a')[0]["href"]
                    try:
                        link = urlparse.parse_qs(urlparse.urlparse(link).query)['u'][
                            0]  # replace link with ?u= part if present
                    except:
                        pass
                    try:
                        link = urlparse.parse_qs(urlparse.urlparse(link).query)['q'][
                            0]  # replace link with ?q= part if present
                    except:
                        pass

                    link = urlparse.parse_qs(urlparse.urlparse(link).query)['url'][
                        0]  # replace link with ?url= part if present
                    link = base64.b64decode(link)  # decode base 64

                    if link.startswith("//"):
                        link = "http:" + link
                    link = replaceHTMLCodes(link)
                    link = link.encode('utf-8')

                    host = re.findall('([\w]+[.][\w]+)$', urlparse.urlparse(link.strip().lower()).netloc)[0]
                    host = replaceHTMLCodes(host)
                    host = host.encode('utf-8')

                    quality = table_body.findAll('span')[0]["class"]
                    if quality == 'quality_cam' or quality == 'quality_ts':
                        quality = 'CAM'
                    elif quality == 'quality_dvd':
                        quality = 'SD'
                        
                    if not filter_host(host):
                        continue
                    count +=1
                    sources.append(
                        {'source': host, 'quality': quality, 'scraper': 'Primewire', 'url': link, 'direct': False})


                except:
                    pass
            if dev_log=='true':
                end_time = time.time() - self.start_time
                send_log(self.name,end_time,count)
            return sources
        except:
            return sources
Exemplo n.º 8
0
    def sources(self, url):
        sources = []
        try:
            if url == None: return sources
            absolute_url = urlparse.urljoin(self.base_link, url)
            html = BeautifulSoup(proxy.get(absolute_url, 'link_ite'))
            tables = html.findAll('table',
                                  attrs={'class': re.compile('link_ite.+?')})
            for table in tables:
                rows = table.findAll('tr')
                for row in rows:
                    link = row.findAll('a')[-1]
                    href = link['href']

                    if not 'gtfo' in href:
                        continue

                    try:
                        href = urlparse.parse_qs(
                            urlparse.urlparse(href).query)['u'][0]
                    except:
                        pass
                    try:
                        href = urlparse.parse_qs(
                            urlparse.urlparse(href).query)['q'][0]
                    except:
                        pass

                    href = base64.b64decode(
                        urlparse.parse_qs(
                            urlparse.urlparse(href).query)['gtfo'][0])
                    href = replaceHTMLCodes(href)

                    host = re.findall(
                        '([\w]+[.][\w]+)$',
                        urlparse.urlparse(href.strip().lower()).netloc)[0]
                    host = replaceHTMLCodes(host)
                    host = host.encode('utf-8')

                    quality = row.findAll('div', attrs={'class':
                                                        'quality'})[0].text
                    if "CAM" in quality or 'TS' in quality:
                        quality = 'CAM'
                    if 'HD' in quality:
                        pass
                    else:
                        quality = 'SD'

                    sources.append({
                        'source': host,
                        'quality': quality,
                        'scraper': self.name,
                        'url': href,
                        'direct': False
                    })
        except:
            pass

        return sources
Exemplo n.º 9
0
    def sources(self, url):
        sources = []
        try:
            if url == None: return sources

            url = urlparse.urljoin(self.base_link, url)
            html = proxy.get(url, 'choose_tabs')
            parsed_html = BeautifulSoup(html)

            table_bodies = parsed_html.findAll('tbody')
            count = 0
            for table_body in table_bodies:
                try:
                    link = table_body.findAll('a')[0]["href"]
                    try:
                        link = urlparse.parse_qs(urlparse.urlparse(link).query)['u'][
                            0]  # replace link with ?u= part if present
                    except:
                        pass
                    try:
                        link = urlparse.parse_qs(urlparse.urlparse(link).query)['q'][
                            0]  # replace link with ?q= part if present
                    except:
                        pass

                    link = urlparse.parse_qs(urlparse.urlparse(link).query)['url'][
                        0]  # replace link with ?url= part if present
                    link = base64.b64decode(link)  # decode base 64

                    if link.startswith("//"):
                        link = "http:" + link
                    link = replaceHTMLCodes(link)
                    link = link.encode('utf-8')

                    host = re.findall('([\w]+[.][\w]+)$', urlparse.urlparse(link.strip().lower()).netloc)[0]
                    host = replaceHTMLCodes(host)
                    host = host.encode('utf-8')

                    quality = table_body.findAll('span')[0]["class"]
                    if quality == 'quality_cam' or quality == 'quality_ts':
                        quality = 'CAM'
                    elif quality == 'quality_dvd':
                        quality = 'SD'
                        
                    if not filter_host(host):
                        continue
                    count +=1
                    sources.append(
                        {'source': host, 'quality': quality, 'scraper': 'Primewire', 'url': link, 'direct': False})


                except:
                    pass
            if dev_log=='true':
                end_time = time.time() - self.start_time
                send_log(self.name,end_time,count)
            return sources
        except:
            return sources
Exemplo n.º 10
0
    def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid = False):
        try:
            html = BeautifulSoup(self.get_html(title, self.tvsearch_link))
            index_items = html.findAll('div', attrs={'class': re.compile('index_item.+?')})
            title = 'watch' + clean_title(" ".join(title.translate(None, '\'"?:!@#$&-,')))

            for index_item in index_items:
                try:
                    links = index_item.findAll('a')
                    for link in links:
                        href = link['href']
                        link_title = link['title']
                        try:
                            href = urlparse.parse_qs(urlparse.urlparse(href).query)['u'][0]
                        except:
                            pass
                        try:
                            href = urlparse.parse_qs(urlparse.urlparse(href).query)['q'][0]
                        except:
                            pass
                        clean_link_title = clean_title(" ".join(link_title.encode().translate(None, '\'"?:!@#$&-,')))
                        if title == clean_link_title:  # href is the show page relative url
                            show_url = urlparse.urljoin(self.base_link, href)
                            html = BeautifulSoup(proxy.get(show_url, 'tv_episode_item'))

                            seasons = html.findAll('div', attrs={'class': 'show_season'})
                            for scraped_season in seasons:
                                if scraped_season['data-id'] == season:
                                    tv_episode_items = scraped_season.findAll('div', attrs={'class': 'tv_episode_item'})
                                    for tv_episode_item in tv_episode_items:
                                        links = tv_episode_item.findAll('a')
                                        for link in links:
                                            if link.contents[0].strip() == "E%s" % episode:
                                                episode_href = link['href']
                                                try:
                                                    episode_href = \
                                                        urlparse.parse_qs(urlparse.urlparse(episode_href).query)['u'][0]
                                                except:
                                                    pass
                                                try:
                                                    episode_href = \
                                                        urlparse.parse_qs(urlparse.urlparse(episode_href).query)['q'][0]
                                                except:
                                                    pass
                                                return self.sources(episode_href)
                except:
                    continue
            return []
        except Exception, argument:        
            if dev_log == 'true':
                error_log(self.name,'Check Search')
            return []
Exemplo n.º 11
0
    def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid = False):
        try:
            html = BeautifulSoup(self.get_html(title, self.tvsearch_link))
            index_items = html.findAll('div', attrs={'class': re.compile('index_item.+?')})
            title = 'watch' + clean_title(" ".join(title.translate(None, '\'"?:!@#$&-,')))

            for index_item in index_items:
                try:
                    links = index_item.findAll('a')
                    for link in links:
                        href = link['href']
                        link_title = link['title']
                        try:
                            href = urlparse.parse_qs(urlparse.urlparse(href).query)['u'][0]
                        except:
                            pass
                        try:
                            href = urlparse.parse_qs(urlparse.urlparse(href).query)['q'][0]
                        except:
                            pass
                        clean_link_title = clean_title(" ".join(link_title.encode().translate(None, '\'"?:!@#$&-,')))
                        if title == clean_link_title:  # href is the show page relative url
                            show_url = urlparse.urljoin(self.base_link, href)
                            html = BeautifulSoup(proxy.get(show_url, 'tv_episode_item'))

                            seasons = html.findAll('div', attrs={'class': 'show_season'})
                            for scraped_season in seasons:
                                if scraped_season['data-id'] == season:
                                    tv_episode_items = scraped_season.findAll('div', attrs={'class': 'tv_episode_item'})
                                    for tv_episode_item in tv_episode_items:
                                        links = tv_episode_item.findAll('a')
                                        for link in links:
                                            if link.contents[0].strip() == "E%s" % episode:
                                                episode_href = link['href']
                                                try:
                                                    episode_href = \
                                                        urlparse.parse_qs(urlparse.urlparse(episode_href).query)['u'][0]
                                                except:
                                                    pass
                                                try:
                                                    episode_href = \
                                                        urlparse.parse_qs(urlparse.urlparse(episode_href).query)['q'][0]
                                                except:
                                                    pass
                                                return self.sources(episode_href)
                except:
                    continue
            return []
        except Exception, argument:        
            if dev_log == 'true':
                error_log(self.name,'Check Search')
            return []
Exemplo n.º 12
0
    def scrape_movie(self, title, year, imdb):
        try:
            query = self.moviesearch_link % urllib.quote_plus(title.replace('\'', '').rsplit(':', 1)[0])
            query = urlparse.urljoin(self.base_link, query)

            html = proxy.get(query, 'item')
            if 'page=2' in html or 'page%3D2' in html:
                html2 = proxy.get(query + '&page=2', 'item')
                html += html2

            html = BeautifulSoup(html)

            cleaned_title = 'watchputlocker' + clean_title(title)
            years = ['(%s)' % str(year), '(%s)' % str(int(year) + 1), '(%s)' % str(int(year) - 1)]

            items = html.findAll('div', attrs={'class': 'item'})

            for item in items:
                links = item.findAll('a')
                for link in links:
                    href = link['href']
                    link_title = link['title']
                    if any(candidate_year in link_title for candidate_year in years):
                        try:
                            href = urlparse.parse_qs(urlparse.urlparse(href).query)['u'][0]
                        except:
                            pass
                        try:
                            href = urlparse.parse_qs(urlparse.urlparse(href).query)['q'][0]
                        except:
                            pass

                        if cleaned_title == clean_title(link_title):
                            url = re.findall('(?://.+?|)(/.+)', href)[0]
                            url = replaceHTMLCodes(url)
                            return self.sources(url)
        except:
            pass
        return []
Exemplo n.º 13
0
    def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb):
        try:
            html = BeautifulSoup(self.get_html(title, self.tvsearch_link))
            index_items = html.findAll('div', attrs={'class': re.compile('index_item.+?')})
            title = 'watch' + clean_title(title).replace(": ", "")

            for index_item in index_items:
                try:
                    links = index_item.findAll('a')
                    for link in links:
                        href = link['href']
                        link_title = link['title']
                        try:
                            href = urlparse.parse_qs(urlparse.urlparse(href).query)['u'][0]
                        except:
                            pass
                        try:
                            href = urlparse.parse_qs(urlparse.urlparse(href).query)['q'][0]
                        except:
                            pass

                        if title == clean_title(link_title):  # href is the show page relative url
                            show_url = urlparse.urljoin(self.base_link, href)
                            html = BeautifulSoup(proxy.get(show_url, 'tv_episode_item'))

                            seasons = html.findAll('div', attrs={'class': 'show_season'})
                            for scraped_season in seasons:
                                if scraped_season['data-id'] == season:
                                    tv_episode_items = scraped_season.findAll('div', attrs={'class': 'tv_episode_item'})
                                    for tv_episode_item in tv_episode_items:
                                        links = tv_episode_item.findAll('a')
                                        for link in links:
                                            if link.contents[0].strip() == "E%s" % episode:
                                                episode_href = link['href']
                                                try:
                                                    episode_href = \
                                                        urlparse.parse_qs(urlparse.urlparse(episode_href).query)['u'][0]
                                                except:
                                                    pass
                                                try:
                                                    episode_href = \
                                                        urlparse.parse_qs(urlparse.urlparse(episode_href).query)['q'][0]
                                                except:
                                                    pass
                                                return self.sources(episode_href)
                except:
                    continue
        except:
            pass
        return []
Exemplo n.º 14
0
    def sources(self, url):
        sources = []
        try:
            if url == None: return sources
            absolute_url = urlparse.urljoin(self.base_link, url)
            html = BeautifulSoup(proxy.get(absolute_url, 'link_ite'))
            tables = html.findAll('table', attrs={'class': re.compile('link_ite.+?')})
            for table in tables:
                rows = table.findAll('tr')
                for row in rows:
                    link = row.findAll('a')[-1]
                    href = link['href']

                    if not 'gtfo' in href:
                        continue

                    try:
                        href = urlparse.parse_qs(urlparse.urlparse(href).query)['u'][0]
                    except:
                        pass
                    try:
                        href = urlparse.parse_qs(urlparse.urlparse(href).query)['q'][0]
                    except:
                        pass

                    href = base64.b64decode(urlparse.parse_qs(urlparse.urlparse(href).query)['gtfo'][0])
                    href = replaceHTMLCodes(href)

                    host = re.findall('([\w]+[.][\w]+)$', urlparse.urlparse(href.strip().lower()).netloc)[0]
                    host = replaceHTMLCodes(host)
                    host = host.encode('utf-8')

                    if "qertewrt" in host:
                        continue

                    quality = row.findAll('div', attrs={'class': 'quality'})[0].text
                    if "CAM" in quality or 'TS' in quality:
                        quality = 'CAM'
                    if 'HD' in quality:
                        pass
                    else:
                        quality = 'SD'

                    sources.append(
                        {'source': host, 'quality': quality, 'scraper': self.name, 'url': href, 'direct': False})
        except:
            pass

        return sources
Exemplo n.º 15
0
 def get_key(self):
     url = self.search_link
     html = proxy.get(url, 'searchform')
     parsed_html = BeautifulSoup(html)
     key = parsed_html.findAll('input', attrs={'name': 'key'})[0]["value"]
     return key
Exemplo n.º 16
0
 def get_key(self):
     url = self.search_link
     html = proxy.get(url, 'searchform')
     parsed_html = BeautifulSoup(html)
     key = parsed_html.findAll('input', attrs={'name': 'key'})[0]["value"]
     return key
Exemplo n.º 17
0
    def scrape_episode(self, title, show_year, year, season, episode, imdb,
                       tvdb):
        try:
            html = BeautifulSoup(self.get_html(title, self.tvsearch_link))
            index_items = html.findAll(
                'div', attrs={'class': re.compile('index_item.+?')})
            title = 'watch' + clean_title(title).replace(": ", "")

            for index_item in index_items:
                try:
                    links = index_item.findAll('a')
                    for link in links:
                        href = link['href']
                        link_title = link['title']
                        try:
                            href = urlparse.parse_qs(
                                urlparse.urlparse(href).query)['u'][0]
                        except:
                            pass
                        try:
                            href = urlparse.parse_qs(
                                urlparse.urlparse(href).query)['q'][0]
                        except:
                            pass

                        if title == clean_title(
                                link_title
                        ):  # href is the show page relative url
                            show_url = urlparse.urljoin(self.base_link, href)
                            html = BeautifulSoup(
                                proxy.get(show_url, 'tv_episode_item'))

                            seasons = html.findAll(
                                'div', attrs={'class': 'show_season'})
                            for scraped_season in seasons:
                                if scraped_season['data-id'] == season:
                                    tv_episode_items = scraped_season.findAll(
                                        'div',
                                        attrs={'class': 'tv_episode_item'})
                                    for tv_episode_item in tv_episode_items:
                                        links = tv_episode_item.findAll('a')
                                        for link in links:
                                            if link.contents[0].strip(
                                            ) == "E%s" % episode:
                                                episode_href = link['href']
                                                try:
                                                    episode_href = \
                                                        urlparse.parse_qs(urlparse.urlparse(episode_href).query)['u'][0]
                                                except:
                                                    pass
                                                try:
                                                    episode_href = \
                                                        urlparse.parse_qs(urlparse.urlparse(episode_href).query)['q'][0]
                                                except:
                                                    pass
                                                return self.sources(
                                                    episode_href)
                except:
                    continue
        except:
            pass
        return []
Exemplo n.º 18
0
    def scrape_episode(self, title, show_year, year, season, episode, imdb,
                       tvdb):
        try:
            query = urlparse.urljoin(
                self.base_link, self.tvsearch_link %
                urllib.quote_plus(title.replace('\'', '').rsplit(':', 1)[0]))

            html = proxy.get(query, 'item')
            if 'page=2' in html or 'page%3D2' in html:
                html2 = proxy.get(query + '&page=2', 'item')
                html += html2

            html = BeautifulSoup(html)

            cleaned_title = 'watchputlocker' + clean_title(title)
            years = [
                '%s' % str(year),
                '%s' % str(int(year) + 1),
                '%s' % str(int(year) - 1)
            ]

            items = html.findAll('div', attrs={'class': 'item'})

            show_url = None
            for item in items:
                links = item.findAll('a')
                for link in links:
                    href = link['href']
                    link_title = link['title']
                    try:
                        href = urlparse.parse_qs(
                            urlparse.urlparse(href).query)['u'][0]
                    except:
                        pass
                    try:
                        href = urlparse.parse_qs(
                            urlparse.urlparse(href).query)['q'][0]
                    except:
                        pass
                    if cleaned_title == clean_title(
                            link_title) and show_year in link_title:
                        url = re.findall('(?://.+?|)(/.+)', href)[0]
                        show_url = urlparse.urljoin(self.base_link,
                                                    replaceHTMLCodes(url))
                    else:
                        continue

                    html = BeautifulSoup(proxy.get(show_url,
                                                   'tv_episode_item'))
                    season_items = html.findAll('div',
                                                attrs={'class': 'show_season'})
                    for season_item in season_items:
                        if season_item["data-id"] != season:
                            continue
                        episode_items = season_item.findAll(
                            'div', attrs={'class': 'tv_episode_item'})
                        for episode_item in episode_items:
                            link = episode_item.findAll('a')[-1]
                            href = link["href"]
                            link_episode = link.contents[0].strip()
                            if link_episode != "E%s" % (episode):
                                continue
                            link_airdate = link.findAll(
                                'span', attrs={'class':
                                               'tv_num_versions'})[-1]  # WTF
                            link_airdate = link_airdate.contents[0]
                            if any(candidate_year in link_airdate
                                   for candidate_year in years):
                                return self.sources(href)

        except:
            pass
        return []