Python random_agent示例，nanscrapers.common.random_agent Python示例

示例#1

0

显示文件

文件： watchepisodes.py 项目： smoke61/modules4all

    def sources(self, url):
        sources = []
        try:
            if url == None: return sources
            headers = {'User-Agent': random_agent()}
            html = BeautifulSoup(
                requests.get(url, headers=headers, timeout=30).content)
            r = html.findAll('div', attrs={'class': 'site'})
            for container in r:
                r_url = container.findAll('a')[0]['data-actuallink'].encode(
                    'utf-8')
                host = re.findall(
                    '([\w]+[.][\w]+)$',
                    urlparse.urlparse(r_url.strip().lower()).netloc)[0]
                host = replaceHTMLCodes(host)
                host = host.encode('utf-8')
                sources.append({
                    'source': host,
                    'quality': 'SD',
                    'scraper': self.name,
                    'url': r_url,
                    'direct': False
                })

        except:
            pass
        return sources

示例#2

0

显示文件

文件： pelispedia.py 项目： noobsandnerds/noobsandnerds

    def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb):
        try:
            if imdb is None:  # TODO get imdb from title
                return
            imdb_title = self.get_imdb_title(imdb)

            headers = {'User-Agent': random_agent()}
            show_url = urlparse.urljoin(self.base_link,
                                        self.tv_link % imdb_title.replace(": ", "-").replace(' ', '-').replace(':',
                                                                                                               '-')).replace(
                '\'',
                '').lower()
            show_url += "/"
            show_url = str(''.join((c for c in unicodedata.normalize('NFD', show_url.decode("utf-8")) if
                                unicodedata.category(c) != 'Mn')))  # remove accents

            html = BeautifulSoup(requests.get(show_url, headers=headers).content)
            season_containers = html.findAll('div', attrs={'class': 'Season container clear'})
            for season_container in season_containers:
                try:
                    links = season_container.findAll("a")
                    for link in links:
                        try:
                            link_title = link.findAll("small")[0].text
                            if 'season %s' % season in link_title.lower() and 'episode %s' % episode in link_title.lower():
                                return self.sources(link["href"])
                        except:
                            continue
                except:
                    continue
        except:
            pass
        return []

示例#3

0

显示文件

文件： onemusic.py 项目： noobsandnerds/noobsandnerds

    def scrape_music(self, title, artist):
        try:
            # print("ONEMUSIC")
            headers = {'User-Agent': random_agent()}
            query = self.search_link % (urllib.quote_plus(title.replace("'", "")))
            query = urlparse.urljoin(self.base_link, query)
            # print("ONEMUSIC", query)
            artist_name = clean_title(artist)
            song_name = clean_title(title)
            # print("ONEMUSIC ARTIST", artist_name)
            html = BeautifulSoup(requests.get(query, headers=headers, timeout=30).content)
            self.musiclist = []
            containers = html.findAll('div', attrs={'class': 'sr-songs-list'})
            for blocks in containers:
                song_block = blocks.findAll('div', attrs={'class': 'item-caption'})
                for item in song_block:
                    href = item.findAll('a')[0]['href']
                    song_title = item.findAll('a')[0]['title']
                    href = href.encode('utf-8')
                    song_title = song_title.encode('utf-8')
                    if clean_title(song_title) == song_name:
                        artist_block = item.findAll('span', attrs={'class': 'singer'})[0]
                        artist = artist_block.findAll('a')[0]['title']
                        artist = artist.encode('utf-8')
                        artist = clean_title(artist)
                        print("ONEMUSIC", href, song_title, artist_name)
                        if artist == artist_name:
                            print("ONEMUSIC PASSED", href, song_title, artist)
                            return self.sources(href, "HD")


        except:
            pass
        return []

示例#4

0

显示文件

def get(url, check, headers=None, data=None):
    if headers is None:
        headers = {
            'User-Agent': random_agent(),
        }
        try:
            request = urllib2.Request(url, headers=headers, data=data)
            html = urllib2.urlopen(request, timeout=10).read()
            if check in str(html): return html
        except:
            pass

    try:
        new_url = get_proxy_url() % urllib.quote_plus(url)
        headers['Referer'] = 'http://%s/' % urlparse.urlparse(new_url).netloc
        request = urllib2.Request(new_url, headers=headers)
        response = urllib2.urlopen(request, timeout=10)
        html = response.read()
        response.close()
        if check in html: return html
    except:
        pass

    try:
        new_url = get_proxy_url() % urllib.quote_plus(url)
        headers['Referer'] = 'http://%s/' % urlparse.urlparse(new_url).netloc
        request = urllib2.Request(new_url, headers=headers)
        html = urllib2.urlopen(request, timeout=10).read()
        if check in html: return html
    except:
        pass

    return

示例#5

0

显示文件

文件： onemovies.py 项目： noobsandnerds/noobsandnerds

    def scrape_movie(self, title, year, imdb):
        try:
            # print("ONEMOVIES")
            headers = {'User-Agent': random_agent()}
            # print("ONEMOVIES", headers)
            query = self.search_link % (urllib.quote_plus(title.replace("'", " ")))
            query = urlparse.urljoin(self.base_link, query)
            cleaned_title = clean_title(title)
            # print("ONEMOVIES", query)
            html = BeautifulSoup(requests.get(query, headers=headers, timeout=30).content)
            containers = html.findAll('div', attrs={'class': 'ml-item'})
            for result in containers:
                links = result.findAll('a')
                # print("ONEMOVIES", links)
                for link in links:
                    link_title = str(link['title'])
                    href = str(link['href'])
                    info = str(link['data-url'])
                    # print("ONEMOVIES", link_title, href, info)
                    if clean_title(link_title) == cleaned_title:
                        html = requests.get(info, headers=headers).content
                        pattern = '<div class="jt-info">%s</div>' % year
                        match = re.findall(pattern, html)
                        if match:
                            # print("ONEMOVIES MATCH", href)
                            return self.sources(replaceHTMLCodes(href))



        except:
            pass
        return []

示例#6

0

显示文件

文件： pelispedia.py 项目： smoke61/modules4all

    def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid = False):
        try:
            if imdb is None:  # TODO get imdb from title
                return
            imdb_title = self.get_imdb_title(imdb)

            headers = {'User-Agent': random_agent()}
            show_url = urlparse.urljoin(self.base_link,
                                        self.tv_link % imdb_title.replace(": ", "-").replace(' ', '-').replace(':',
                                                                                                               '-')).replace(
                '\'',
                '').lower()
            show_url += "/"
            show_url = str(''.join((c for c in unicodedata.normalize('NFD', show_url.decode("utf-8")) if
                                unicodedata.category(c) != 'Mn')))  # remove accents

            html = BeautifulSoup(requests.get(show_url, headers=headers).content)
            season_containers = html.findAll('div', attrs={'class': 'Season container clear'})
            for season_container in season_containers:
                try:
                    links = season_container.findAll("a")
                    for link in links:
                        try:
                            link_title = link.findAll("small")[0].text
                            if 'season %s' % season in link_title.lower() and 'episode %s' % episode in link_title.lower():
                                return self.sources(link["href"])
                        except:
                            continue
                except:
                    continue
        except:
            pass
        return []

示例#7

0

显示文件

文件： moviexk.py 项目： noobsandnerds/noobsandnerds

    def scrape_movie(self, title, year, imdb):
        try:
            print("MOVIEXK")
            headers = {'User-Agent': random_agent()}
            query = self.search_link % (urllib.quote_plus(title) + "+" + str(year))
            query = urlparse.urljoin(self.base_link, query)
            cleaned_title = clean_title(title)
            html = BeautifulSoup(requests.get(query, headers=headers, timeout=30).content)

            containers = html.findAll('div', attrs={'class': 'inner'})
            for container in containers:
                print("MOVIEXK r1", container)
                movie_link = container.findAll('a')[0]
                r_href = movie_link['href']
                print("MOVIEXK r2", r_href)
                r_title = movie_link['title']
                link_year = container.findAll('span', attrs={'class': 'year'})[0].findAll('a')[0].text
                print("MOVIEXK r3", r_title)
                print("MOVIEXK RESULTS", r_title, r_href)
                if str(year) == link_year:
                    if cleaned_title in clean_title(r_title):
                        redirect = requests.get(r_href, headers=headers, timeout=30).text
                        r_url = re.findall('<a href="(.*?)" class="btn-watch"', redirect)[0]
                        r_url = r_url.encode('utf-8')
                        print("MOVIEXK PLAY URL", r_url)
                        return self.sources(replaceHTMLCodes(r_url))
        except:
            pass
        return []

示例#8

0

显示文件

文件： xmovies.py 项目： 24061993/noobsandnerds

    def sources(self, url):
        sources = []
        try:
            if url == None: return sources

            absolute_url = urlparse.urljoin(self.base_link, url)
            referer_url = url.replace('watching.html', '') + 'watching.html'

            headers = {'User-Agent': random_agent}
            post = requests.get(absolute_url, headers=headers, timeout=30).content

            post = re.findall('movie=(\d+)', post)[0]
            post = {'id': post, 'episode_id': '0', 'link_id': '0', 'from': 'v3'}

            headers = {'X-Requested-With': 'XMLHttpRequest', 'Accept-Formating': 'application/json, text/javascript',
                       'Server': 'cloudflare-nginx'}
            headers['Referer'] = referer_url
            headers['User-Agent'] = random_agent()
            load_episode_url = urlparse.urljoin(self.base_link, '/ajax/movie/load_episodes')
            html = BeautifulSoup(requests.post(load_episode_url, data=post, headers=headers).content)

            pattern = re.compile("load_player\(\s*'([^']+)'\s*,\s*'?(\d+)\s*'?")
            links = html.findAll('a', attrs={'onclick': pattern})

            for link in links:
                info = re.findall(pattern, link['onclick'])[0]  # (id, quality) quality can be 0
                try:
                    play = urlparse.urljoin(self.base_link, '/ajax/movie/load_player_v2')
                    post = {'id': info[0], 'quality': info[1]}
                    player_url = requests.post(play, data=post, headers=headers).content

                    json_url = json.loads(player_url)['link']

                    response = proxy.get_raw(json_url, headers=headers)
                    video_url = response.geturl()

                    try:
                        unproxied_video_url = urlparse.parse_qs(urlparse.urlparse(video_url).query)['u'][0]
                    except:
                        pass
                    try:
                        unproxied_video_url = urlparse.parse_qs(urlparse.urlparse(video_url).query)['q'][0]
                    except:
                        pass
                        pass

                    if 'openload.' in unproxied_video_url:
                        sources.append(
                            {'source': 'openload.co', 'quality': 'HD', 'scraper': self.name, 'url': unproxied_video_url,
                             'direct': False})

                    else:
                        sources.append(
                            {'source': 'google video', 'quality': googletag(unproxied_video_url)[0]['quality'],
                             'scraper': self.name, 'url': unproxied_video_url, 'direct': True})
                except:
                    continue
            return sources
        except:
            return sources

示例#9

0

显示文件

文件： pubfilm.py 项目： 24061993/noobsandnerds

    def scrape_movie(self, title, year, imdb):
        try:
            title = title.translate(None,
                                    '\/:*?"\'<>|!,').replace(' ', '-').replace(
                                        '--', '-').lower()
            headers = {'User-Agent': random_agent()}
            search_url = urlparse.urljoin(
                self.base_link, self.moviesearch_hd_link % (title, year))
            html = None
            try:
                prehtml = self.scraper.get(search_url,
                                           headers=headers,
                                           timeout=30)
                if html.status_code != 404:
                    html = BeautifulSoup(prehtml.content)
            except:
                pass

            if html == None:
                search_url = urlparse.urljoin(
                    self.base_link, self.moviesearch_sd_link % (title, year))

                html = BeautifulSoup(
                    self.scraper.get(search_url, headers=headers,
                                     timeout=30).content)

            if html == None:
                raise Exception()
            return self.sources(search_url)
        except:
            pass
        return []

示例#10

0

显示文件

文件： afdah.py 项目： 24061993/noobsandnerds

    def sources(self, url):
        sources = []
        try:
            if url == None: return sources

            referer = urlparse.urljoin(self.base_link, url)

            headers = {'X-Requested-With': 'XMLHttpRequest'}
            headers['Referer'] = referer
            headers['User-Agent'] = random_agent()

            post = urlparse.parse_qs(
                urlparse.urlparse(referer).query).values()[0][0]
            post = {'v': post}

            url = urlparse.urljoin(self.base_link, '/video_info/iframe')

            html = requests.post(url, data=post, headers=headers).content

            quality_url_pairs = re.findall('"(\d+)"\s*:\s*"([^"]+)', html)

            for pair in quality_url_pairs:
                quality = pair[0]
                url = urllib.unquote(pair[1].split('url=')[-1])
                sources.append({
                    'source': 'google video',
                    'quality': quality,
                    'scraper': self.name,
                    'url': url,
                    'direct': True
                })
        except:
            pass
        return sources

示例#11

0

显示文件

    def scrape_episode(self, title, show_year, year, season, episode, imdb,
                       tvdb):
        try:
            headers = {'User-Agent': random_agent()}
            query = "%s+season+%s" % (urllib.quote_plus(title), season)
            query = self.search_link % query
            query = urlparse.urljoin(self.base_link, query)
            cleaned_title = clean_title(title)
            checkseason = cleaned_title + "season" + season
            # print("ONEMOVIES", query,checkseason)
            html = BeautifulSoup(
                requests.get(query, headers=headers, timeout=30).content)
            containers = html.findAll('div', attrs={'class': 'ml-item'})
            for result in containers:
                links = result.findAll('a')
                # print("ONEMOVIES", links)
                for link in links:
                    link_title = str(link['title'])
                    href = str(link['href'])

                    # print("ONEMOVIES", link_title, href, info)
                    if clean_title(link_title) == checkseason:
                        ep_id = '?episode=%01d' % int(episode)
                        href = href + ep_id
                        # print("ONEMOVIES Passed", href)
                        return self.sources(replaceHTMLCodes(href))

        except:
            pass
        return []

示例#12

0

显示文件

文件： onemovies.py 项目： noobsandnerds/noobsandnerds

    def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb):
        try:
            headers = {'User-Agent': random_agent()}
            query = "%s+season+%s" % (urllib.quote_plus(title), season)
            query = self.search_link % query
            query = urlparse.urljoin(self.base_link, query)
            cleaned_title = clean_title(title)
            checkseason = cleaned_title + "season" + season
            # print("ONEMOVIES", query,checkseason)
            html = BeautifulSoup(requests.get(query, headers=headers, timeout=30).content)
            containers = html.findAll('div', attrs={'class': 'ml-item'})
            for result in containers:
                links = result.findAll('a')
                # print("ONEMOVIES", links)
                for link in links:
                    link_title = str(link['title'])
                    href = str(link['href'])

                    # print("ONEMOVIES", link_title, href, info)
                    if clean_title(link_title) == checkseason:
                        ep_id = '?episode=%01d' % int(episode)
                        href = href + ep_id
                        # print("ONEMOVIES Passed", href)
                        return self.sources(replaceHTMLCodes(href))

        except:
            pass
        return []

示例#13

0

显示文件

    def scrape_movie(self, title, year, imdb):
        try:
            # print("ONEMOVIES")
            headers = {'User-Agent': random_agent()}
            # print("ONEMOVIES", headers)
            query = self.search_link % (urllib.quote_plus(
                title.replace("'", " ")))
            query = urlparse.urljoin(self.base_link, query)
            cleaned_title = clean_title(title)
            # print("ONEMOVIES", query)
            html = BeautifulSoup(
                requests.get(query, headers=headers, timeout=30).content)
            containers = html.findAll('div', attrs={'class': 'ml-item'})
            for result in containers:
                links = result.findAll('a')
                # print("ONEMOVIES", links)
                for link in links:
                    link_title = str(link['title'])
                    href = str(link['href'])
                    info = str(link['data-url'])
                    # print("ONEMOVIES", link_title, href, info)
                    if clean_title(link_title) == cleaned_title:
                        html = requests.get(info, headers=headers).content
                        pattern = '<div class="jt-info">%s</div>' % year
                        match = re.findall(pattern, html)
                        if match:
                            # print("ONEMOVIES MATCH", href)
                            return self.sources(replaceHTMLCodes(href))

        except:
            pass
        return []

示例#14

0

显示文件

文件： moviexk.py 项目： noobsandnerds/noobsandnerds

 def sources(self, url):
     sources = []
     try:
         if url == None: return sources
         headers = {'User-Agent': random_agent()}
         html = BeautifulSoup(requests.get(url, headers=headers, timeout=30).content)
         r = html.findAll('source')
         for r_source in r:
             url = r_source['src'].encode('utf-8')
             if not 'google' in url:
                 try:
                     req = requests.head(url, headers=headers)
                     if req.headers['Location'] != "":
                         url = req.headers['Location']
                         url = url.replace('https://', 'http://').replace(':443/', '/')
                 except:
                     pass
             if 'google' in url:
                 quality = r_source['data-res'].encode('utf-8')
                 if "1080" in quality:
                     quality = "1080"
                 elif "720" in quality:
                     quality = "720"
                 else:
                     quality = "SD"
                 print("MOVIEXK SOURCES", url, quality)
                 sources.append({'source': 'google video', 'quality': quality, 'scraper': self.name, 'url': url,
                                 'direct': True})
             else:
                 sources.append(
                     {'source': 'moviexk', 'quality': 'SD', 'scraper': self.name, 'url': url, 'direct': True})
     except:
         pass
     return sources

示例#15

0

显示文件

文件： mfree.py 项目： 24061993/noobsandnerds

    def scrape_episode(self, title, show_year, year, season, episode, imdb,
                       tvdb):
        headers = {'User-Agent': random_agent()}
        q = (title.translate(None, '\/:*?"\'<>|!,')).replace(' ', '-').replace(
            '--', '-').lower()
        query = urlparse.urljoin(self.base_link, self.tv_search_link % q)
        cleaned_title = clean_title(title)
        html = BeautifulSoup(
            requests.get(query, headers=headers, timeout=30).content)

        links = html.findAll('a', attrs={'class': 'top-h1'})
        show_url = None

        for link in links:
            link_title = link.text
            if cleaned_title == clean_title(link_title):
                show_url = link["href"]
                break

        if show_url:
            html = BeautifulSoup(
                requests.get(show_url, headers=headers, timeout=30).content)
            link_container = html.findAll("div", attrs={'class': 'bottom'})[-1]
            episode_links = link_container.findAll("a")
            episode_format1 = "S%02dE%02d" % (int(season), int(episode))
            episode_format2 = "S%02d-E%02d" % (int(season), int(episode))
            for episode_link in episode_links:
                button = episode_link.contents[0]
                episode_text = button.text
                if episode_format1 in episode_text or episode_format2 in episode_text:
                    episode_url = episode_link["href"]
                    return self.sources(episode_url, "SD")

示例#16

0

显示文件

文件： onemusic.py 项目： noobsandnerds/noobsandnerds

    def sources(self, url, quality):
        sources = []
        try:
            headers = {'User-Agent': random_agent()}
            song_id = re.findall('-(\d+).html', url)[0]
            query = self.sources_link % song_id
            query = urlparse.urljoin(self.base_link, query)

            # print("ONEMUSIC SONG ID", song_id, query)
            response = requests.get(query, headers=headers).content
            source_json = json.loads(response)
            songs_json = source_json['sources']
            for item in songs_json:
                hdmusic = item['link_320']
                sdmusic = item['link_128']
                hdmusic = hdmusic.encode('utf-8')
                sdmusic = sdmusic.encode('utf-8')
                hdmusic = hdmusic.replace(' ', '%20')
                sdmusic = sdmusic.replace(' ', '%20')
                if not "/mobile/" in hdmusic: sources.append(
                    {'source': 'mp3', 'quality': 'HD', 'scraper': self.name, 'url': hdmusic, 'direct': True})
                if not "mobile" in sdmusic: sources.append(
                    {'source': 'mp3', 'quality': 'SD', 'scraper': self.name, 'url': sdmusic, 'direct': True})
                # print("ONEMUSIC SOURCES", sources)

        except:
            pass
        return sources

示例#17

0

显示文件

文件： xmovies.py 项目： 24061993/noobsandnerds

 def scrape_movie(self, title, year, imdb):
     try:
         headers = {'User-Agent': random_agent()}
         query = urlparse.urljoin(self.base_link, self.search_link)
         query = query % urllib.quote_plus(title)
         # print ("XMOVIES query", query)
         cleaned_title = clean_title(title)
         html = BeautifulSoup(requests.get(query, headers=headers, timeout=30).content)
         containers = html.findAll('div', attrs={'class': 'item_movie'})
         # print ("XMOVIES r1", containers)
         for container in containers:
             try:
                 links = container.findAll('h2', attrs={'class': 'tit'})[0]
                 r = links.findAll('a')
                 for link in r:
                     link_title = link['title'].encode('utf-8')
                     href = link['href'].encode('utf-8')
                     if len(link_title) > 0 and len(href) > 0:
                         parsed = re.findall('(.+?) \((\d{4})', link_title)
                         parsed_title = parsed[0][0]
                         parsed_years = parsed[0][1]
                         if cleaned_title.lower() == clean_title(parsed_title).lower() and year == parsed_years:
                             if not "http:" in href: href = "http:" + href
                             return self.sources(replaceHTMLCodes(href))
             except:
                 pass
     except:
         pass
     return []

示例#18

0

显示文件

文件： dizigold.py 项目： noobsandnerds/noobsandnerds

    def sources(self, url):
        sources = []
        try:
            if url == None: return sources

            referer = urlparse.urljoin(self.base_link, url)

            headers = {}
            headers['Referer'] = referer
            headers['User-Agent'] = random_agent()

            html = requests.get(referer, headers=headers, timeout=30).content

            player_id = re.compile('var\s*view_id\s*=\s*"(\d*)"').findall(html)[0]
            player_url = self.player_link % player_id
            player_html = requests.get(player_url, headers=headers, timeout=30).content
            player_html_parsed = BeautifulSoup(player_html)

            try:
                video_url = player_html_parsed.findAll('iframe')[-1]['src']

                if 'openload' in video_url:
                    host = 'openload.co'
                    direct = False
                    video_url = [{'url': video_url, 'quality': 'HD'}]

                elif 'ok.ru' in video_url:
                    host = 'vk'
                    direct = True
                    video_url = odnoklassniki(video_url)

                elif 'vk.com' in video_url:
                    host = 'vk'
                    direct = True
                    video_url = vk(video_url)

                else:
                    raise Exception()

                for i in video_url: sources.append(
                    {'source': host, 'quality': i['quality'], 'scraper': self.name, 'url': i['url'], 'direct': direct})
            except:
                pass

            try:

                links = re.compile('"?file"?\s*:\s*"([^"]+)"\s*,\s*"?label"?\s*:\s*"(\d+)p?"').findall(player_html)

                for link in links: sources.append(
                    {'source': 'google video', 'quality': link[1], 'scraper': self.name, 'url': link[0],
                     'direct': True})

            except:
                pass

        except:
            pass

        return sources

示例#19

0

显示文件

 def get_imdb_title(self, imdb):
     headers = {'User-Agent': random_agent(), 'Accept-Language': 'es-es'}
     html = BeautifulSoup(
         requests.get('http://www.imdb.com/title/%s' % imdb,
                      headers=headers).content)
     html_title = html.findAll('title')[0].text.encode('utf-8')
     imdb_title = re.sub('(?:\(||\(TV Series\s|\s)\d{4}.+', '',
                         html_title).strip()
     return imdb_title

示例#20

0

显示文件

文件： moviexk.py 项目： 24061993/noobsandnerds

    def scrape_episode(self, title, show_year, year, season, episode, imdb,
                       tvdb):
        try:
            print("MOVIEXK")
            headers = {'User-Agent': random_agent()}
            query = self.search_link % (urllib.quote_plus(title))
            query = urlparse.urljoin(self.base_link, query)
            cleaned_title = clean_title(title)
            ep_id = int(episode)
            season_id = int(season)
            html = BeautifulSoup(
                requests.get(query, headers=headers, timeout=30).content)

            containers = html.findAll('div', attrs={'class': 'inner'})
            for container in containers:
                print("MOVIEXK r1", container)
                show_link = container.findAll('a')[0]
                r_href = show_link['href']
                print("MOVIEXK r2", r_href)
                r_title = show_link['title']
                print("MOVIEXK r3", r_title)
                print("MOVIEXK r4", r_title, r_href)
                if cleaned_title in clean_title(
                        r_title) and "tv" in r_title.lower():
                    redirect = requests.get(r_href,
                                            headers=headers,
                                            timeout=30).text
                    r_url = re.findall('<a href="(.*?)" class="btn-watch"',
                                       redirect)[0]
                    r_url = r_url.encode('utf-8')
                    links = BeautifulSoup(
                        requests.get(r_url, headers=headers,
                                     timeout=30).content)
                    ep_items = links.findAll('ul',
                                             attrs={'class': 'episodelist'})
                    for items in ep_items:
                        ep_links = items.findAll('a')
                        for r in ep_links:
                            print("MOVIEXK r5", r)
                            ep_url = r['href'].encode('utf-8')
                            ep_title = r['title'].encode('utf-8')
                            print("MOVIEXK r6", ep_url, ep_title)
                            clean_ep_title = clean_title(ep_title)
                            if "s%02de%02d" % (
                                    season_id,
                                    ep_id) in clean_ep_title or "s%02d%02d" % (
                                        season_id, ep_id
                                    ) in clean_ep_title or "s%02d%d" % (
                                        season_id, ep_id
                                    ) in clean_ep_title or "epse%d%d" % (
                                        season_id, ep_id) in clean_ep_title:
                                return self.sources(replaceHTMLCodes(ep_url))
        except:
            pass
        return []

示例#21

0

显示文件

文件： watch5s_mv_tv-edit.py 项目： 24061993/noobsandnerds

    def Sources(self, url):
        sources = []
        try:
            for movielink, referer in self.url:
                try:
                    # print("CMOVIES SOURCE LINKS", movielink)
                    referer = referer
                    pages = requests.get(movielink).text
                    scripts = re.findall('hash\s*:\s*"([^"]+)', pages)[0]
                    # print("CMOVIES SERVER SCRIPT", scripts)
                    if scripts:
                        token = self.__get_token()
                        key = hashlib.md5('(*&^%$#@!' +
                                          scripts[46:58]).hexdigest()
                        cookie = '%s=%s' % (key, token)
                        stream_url = self.stream_link % (
                            scripts,
                            hashlib.md5('!@#$%^&*(' + token).hexdigest())
                        # print("CMOVIES PLAYABLE LINKS", stream_url)
                        headers = {
                            'Referer': referer,
                            'User-Agent': random_agent(),
                            'Cookie': cookie
                        }
                        req = s.get(stream_url, headers=headers,
                                    timeout=5).json()
                        playlist = req['playlist'][0]['sources']
                        #print playlist
                        for item in playlist:
                            url = item['file'].encode('utf-8')
                            r_quality = item['label'].encode('utf-8')
                            if r_quality in ['1080', '1080p', '1080P']:
                                quality = "1080p"
                            elif r_quality in ['720', '720p', '720P']:
                                quality = "HD"
                            else:
                                quality = "SD"
                            # print("CMOVIES playlist", quality ,url)
                            sources.append({
                                'source': 'gvideo',
                                'quality': quality,
                                'scraper': 'Watch5s',
                                'url': url,
                                'direct': True
                            })

                except:
                    pass

        except:
            pass
        return sources

示例#22

0

显示文件

文件： mfree.py 项目： 24061993/noobsandnerds

    def scrape_movie(self, title, year, imdb):
        try:
            headers = {'User-Agent': random_agent()}
            q = (title.translate(None,
                                 '\/:*?"\'<>|!,')).replace(' ', '-').replace(
                                     '--', '-').lower()
            query = urlparse.urljoin(self.base_link,
                                     self.movie_search_link % q)
            cleaned_title = clean_title(title)
            html = requests.get(query, headers=headers, timeout=30).content
            containers = re.compile(
                '<a class="top-item".*href="(.*?)"><cite>(.*?)</cite></a>'
            ).findall(html)
            for href, title in containers:
                parsed = re.findall('(.+?) \((\d{4})', title)
                parsed_title = parsed[0][0]
                parsed_years = parsed[0][1]
                if cleaned_title == clean_title(
                        parsed_title) and year == parsed_years:
                    try:
                        headers = {'User-Agent': random_agent()}
                        html = requests.get(href, headers=headers,
                                            timeout=30).content
                        parsed_html = BeautifulSoup(html)
                        quality_title = parsed_html.findAll(
                            "h3", attrs={'title':
                                         re.compile("Quality of ")})[0]
                        quality = quality_title.findAll('span')[0].text
                        match = re.search('href="([^"]+-full-movie-[^"]+)',
                                          html)
                        if match:
                            url = match.group(1)
                            return self.sources(url, "SD")
                    except:
                        pass

        except:
            pass
        return []

示例#23

0

显示文件

def get_raw(url, headers=None, data=None):
    if headers is None:
        headers = {
            'User-Agent': random_agent(),
        }

    try:
        new_url = get_proxy_url() % urllib.quote_plus(url)
        headers['Referer'] = 'http://%s/' % urlparse.urlparse(new_url).netloc
        request = urllib2.Request(new_url, headers=headers)
        response = urllib2.urlopen(request, timeout=10)
        return response
    except:
        pass

示例#24

0

显示文件

文件： moviexk.py 项目： 24061993/noobsandnerds

 def sources(self, url):
     sources = []
     try:
         if url == None: return sources
         headers = {'User-Agent': random_agent()}
         html = BeautifulSoup(
             requests.get(url, headers=headers, timeout=30).content)
         r = html.findAll('source')
         for r_source in r:
             url = r_source['src'].encode('utf-8')
             if not 'google' in url:
                 try:
                     req = requests.head(url, headers=headers)
                     if req.headers['Location'] != "":
                         url = req.headers['Location']
                         url = url.replace('https://',
                                           'http://').replace(':443/', '/')
                 except:
                     pass
             if 'google' in url:
                 quality = r_source['data-res'].encode('utf-8')
                 if "1080" in quality:
                     quality = "1080"
                 elif "720" in quality:
                     quality = "720"
                 else:
                     quality = "SD"
                 print("MOVIEXK SOURCES", url, quality)
                 sources.append({
                     'source': 'google video',
                     'quality': quality,
                     'scraper': self.name,
                     'url': url,
                     'direct': True
                 })
             else:
                 sources.append({
                     'source': 'moviexk',
                     'quality': 'SD',
                     'scraper': self.name,
                     'url': url,
                     'direct': True
                 })
     except:
         pass
     return sources

示例#25

0

显示文件

 def scrape_episode(self, title, show_year, year, season, episode, imdb,
                    tvdb):
     try:
         headers = {'User-Agent': random_agent()}
         query = self.search_link % (urllib.quote_plus(title))
         query = urlparse.urljoin(self.base_link, query)
         cleaned_title = clean_title(title)
         ep_id = int(episode)
         season_id = int(season)
         html = requests.get(query, headers=headers, timeout=30).json()
         results = html['series']
         for item in results:
             r_title = item['label'].encode('utf-8')
             r_link = item['seo'].encode('utf-8')
             if cleaned_title == clean_title(r_title):
                 r_page = self.base_link + "/" + r_link
                 # print("WATCHEPISODES r1", r_title,r_page)
                 r_html = BeautifulSoup(
                     requests.get(r_page, headers=headers,
                                  timeout=30).content)
                 r = r_html.findAll(
                     'div', attrs={'class': re.compile('\s*el-item\s*')})
                 for container in r:
                     try:
                         r_href = container.findAll('a')[0]['href'].encode(
                             'utf-8')
                         r_title = container.findAll(
                             'a')[0]['title'].encode('utf-8')
                         # print("WATCHEPISODES r3", r_href,r_title)
                         episode_check = "[sS]%02d[eE]%02d" % (int(season),
                                                               int(episode))
                         match = re.search(episode_check, r_title)
                         if match:
                             # print("WATCHEPISODES PASSED EPISODE", r_href)
                             return self.sources(replaceHTMLCodes(r_href))
                         else:
                             match2 = re.search(episode_check, r_href)
                             if match2:
                                 # print("WATCHEPISODES PASSED EPISODE", r_href)
                                 return self.sources(
                                     replaceHTMLCodes(r_href))
                     except:
                         pass
     except:
         pass
     return []

示例#26

0

显示文件

文件： watchepisodes.py 项目： noobsandnerds/noobsandnerds

    def sources(self, url):
        sources = []
        try:
            if url == None: return sources
            headers = {'User-Agent': random_agent()}
            html = BeautifulSoup(requests.get(url, headers=headers, timeout=30).content)
            r = html.findAll('div', attrs={'class': 'site'})
            for container in r:
				r_url = container.findAll('a')[0]['data-actuallink'].encode('utf-8')
				host = re.findall('([\w]+[.][\w]+)$', urlparse.urlparse(r_url.strip().lower()).netloc)[0]
				host = replaceHTMLCodes(host)
				host = host.encode('utf-8')
				sources.append({'source': host, 'quality': 'SD', 'scraper': self.name, 'url': r_url,'direct': False})

        except:
            pass
        return sources

示例#27

0

显示文件

文件： pubfilm.py 项目： 24061993/noobsandnerds

    def scrape_episode(self, title, show_year, year, season, episode, imdb,
                       tvdb):
        try:
            for try_year in [str(year), str(int(year) - 1)]:
                tvshowtitle = '%s %s: Season %s' % (title, try_year, season)
                headers = {
                    'X-Requested-With': 'XMLHttpRequest',
                    'User-Agent': random_agent()
                }

                post = {
                    'aspp': tvshowtitle,
                    'action': 'ajaxsearchpro_search',
                    'options':
                    'qtranslate_lang=0&set_exactonly=checked&set_intitle=None&customset[]=post',
                    'asid': '4',
                    'asp_inst_id': '4_1'
                }

                url = urlparse.urljoin(self.base_link, self.tvsearch_link)
                html = BeautifulSoup(
                    self.scraper.post(url,
                                      data=post,
                                      headers=headers,
                                      timeout=30).content)
                links = html.findAll('a', attrs={'class': 'asp_res_url'})
                show_url = None
                for link in links:
                    href = link["href"]
                    link_tvshowtitle = re.findall('(.+?: Season \d+)',
                                                  link.contents[0].strip())[0]
                    if title.lower() in link_tvshowtitle.lower() and str(
                            season) in link_tvshowtitle:
                        if try_year in link_tvshowtitle:
                            show_url = href
                            break
                if show_url is None:
                    continue
                episode_url = show_url + '?episode=%01d' % int(episode)
                return self.sources(episode_url)
        except:
            pass
        return []

示例#28

0

显示文件

文件： moviexk.py 项目： noobsandnerds/noobsandnerds

    def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb):
        try:
            print("MOVIEXK")
            headers = {'User-Agent': random_agent()}
            query = self.search_link % (urllib.quote_plus(title))
            query = urlparse.urljoin(self.base_link, query)
            cleaned_title = clean_title(title)
            ep_id = int(episode)
            season_id = int(season)
            html = BeautifulSoup(requests.get(query, headers=headers, timeout=30).content)

            containers = html.findAll('div', attrs={'class': 'inner'})
            for container in containers:
                print("MOVIEXK r1", container)
                show_link = container.findAll('a')[0]
                r_href = show_link['href']
                print("MOVIEXK r2", r_href)
                r_title = show_link['title']
                print("MOVIEXK r3", r_title)
                print("MOVIEXK r4", r_title, r_href)
                if cleaned_title in clean_title(r_title) and "tv" in r_title.lower():
                    redirect = requests.get(r_href, headers=headers, timeout=30).text
                    r_url = re.findall('<a href="(.*?)" class="btn-watch"', redirect)[0]
                    r_url = r_url.encode('utf-8')
                    links = BeautifulSoup(requests.get(r_url, headers=headers, timeout=30).content)
                    ep_items = links.findAll('ul', attrs={'class': 'episodelist'})
                    for items in ep_items:
                        ep_links = items.findAll('a')
                        for r in ep_links:
                            print("MOVIEXK r5", r)
                            ep_url = r['href'].encode('utf-8')
                            ep_title = r['title'].encode('utf-8')
                            print("MOVIEXK r6", ep_url, ep_title)
                            clean_ep_title = clean_title(ep_title)
                            if "s%02de%02d" % (season_id, ep_id) in clean_ep_title or "s%02d%02d" % (
                                    season_id, ep_id) in clean_ep_title or "s%02d%d" % (
                                    season_id, ep_id) in clean_ep_title or "epse%d%d" % (season_id, ep_id) in clean_ep_title :
                                return self.sources(replaceHTMLCodes(ep_url))
        except:
            pass
        return []

示例#29

0

显示文件

文件： onemusic.py 项目： ledtvavs/repository.ledtv

    def sources(self, url, quality):
        sources = []
        try:
            headers = {'User-Agent': random_agent()}
            song_id = re.findall('-(\d+).html', url)[0]
            query = self.sources_link % song_id
            query = urlparse.urljoin(self.base_link, query)

            # print("ONEMUSIC SONG ID", song_id, query)
            response = requests.get(query, headers=headers).content
            source_json = json.loads(response)
            songs_json = source_json['sources']
            for item in songs_json:
                hdmusic = item['link_320']
                sdmusic = item['link_128']
                hdmusic = hdmusic.encode('utf-8')
                sdmusic = sdmusic.encode('utf-8')
                hdmusic = hdmusic.replace(' ', '%20')
                sdmusic = sdmusic.replace(' ', '%20')
                if not "/mobile/" in hdmusic:
                    sources.append({
                        'source': 'mp3',
                        'quality': 'HD',
                        'scraper': self.name,
                        'url': hdmusic,
                        'direct': True
                    })
                if not "mobile" in sdmusic:
                    sources.append({
                        'source': 'mp3',
                        'quality': 'SD',
                        'scraper': self.name,
                        'url': sdmusic,
                        'direct': True
                    })
                # print("ONEMUSIC SOURCES", sources)

        except:
            pass
        return sources

示例#30

0

显示文件

 def scrape_movie(self, title, year, imdb):
     try:
         headers = {'User-Agent': random_agent()}
         query = self.search_link % (urllib.quote_plus(title))
         query = urlparse.urljoin(self.base_link, query)
         cleaned_title = clean_title(title)
         html = BeautifulSoup(requests.get(query, headers=headers, timeout=30).content)
         containers = html.findAll('div', attrs={'class': 'cell_container'})
         for container in containers:
             links = container.findAll('a')
             for link in links:
                 link_title = link['title']
                 href = link['href']
                 if len(link_title) > 0 and len(href) > 0:
                     parsed = re.findall('(.+?) \((\d{4})', link_title)
                     parsed_title = parsed[0][0]
                     parsed_years = parsed[0][1]
                     if cleaned_title == clean_title(parsed_title) and year == parsed_years:
                         return self.sources(replaceHTMLCodes(href))
     except:
         pass
     return []

示例#31

0

显示文件

文件： watchepisodes.py 项目： ledtvavs/repository.ledtv

    def sources(self, url):
        #print '::::::::::::::'+url
        sources = []
        try:
            if url == None: return sources
            count = 0
            headers = {'User-Agent': random_agent()}
            html = requests.get(url, headers=headers, timeout=30).content
            r = re.compile('<div class="ll-item">.+?<a href="(.+?)"',re.DOTALL).findall(html)
            for url in r:
                while count<10:
                    count +=1
                    PAGE = requests.get(url).content
                    host_url = re.compile('<div class="wb-main">.+?<a rel="nofollow" target="_blank" href="(.+?)"',re.DOTALL).findall(PAGE)
                    for final_url in host_url:
                        holster = final_url.split('//')[1].replace('www.','')
                        holster = holster.split('/')[0].split('.')[0].title()
                        sources.append({'source': holster, 'quality': 'SD', 'scraper': self.name, 'url': final_url, 'direct': False})

        except:
            pass
        return sources

示例#32

0

显示文件

    def scrape_movie(self, title, year, imdb):
        try:
            # print("MOVIEGO INIT")
            headers = {'User-Agent': random_agent()}
            searchquery = self.search_link % (urllib.quote_plus(title), year)
            query = urlparse.urljoin(self.base_link, searchquery)
            cleaned_title = clean_title(title)
            html = requests.get(query, headers=headers).content
            html = BeautifulSoup(html)

            containers = html.findAll('div', attrs={'class': 'short_content'})
            # print("MOVIEGO MOVIES",containers)
            for items in containers:
                href = items.findAll('a')[0]['href']
                title = items.findAll('div', attrs={'class': 'short_header'})[0]
                if year in str(title):
                    title = normalize(str(title))
                    if title == cleaned_title:
                        return self.sources(replaceHTMLCodes(href))

        except:
            return []

示例#33

0

显示文件

文件： onemusic.py 项目： ledtvavs/repository.ledtv

    def scrape_music(self, title, artist, debrid=False):
        try:
            # print("ONEMUSIC")
            headers = {'User-Agent': random_agent()}
            query = self.search_link % (urllib.quote_plus(
                title.replace("'", "")))
            query = urlparse.urljoin(self.base_link, query)
            # print("ONEMUSIC", query)
            artist_name = clean_title(artist)
            song_name = clean_title(title)
            # print("ONEMUSIC ARTIST", artist_name)
            html = BeautifulSoup(
                requests.get(query, headers=headers, timeout=30).content)
            self.musiclist = []
            containers = html.findAll('div', attrs={'class': 'sr-songs-list'})
            for blocks in containers:
                song_block = blocks.findAll('div',
                                            attrs={'class': 'item-caption'})
                for item in song_block:
                    href = item.findAll('a')[0]['href']
                    song_title = item.findAll('a')[0]['title']
                    href = href.encode('utf-8')
                    song_title = song_title.encode('utf-8')
                    if clean_title(song_title) == song_name:
                        artist_block = item.findAll('span',
                                                    attrs={'class':
                                                           'singer'})[0]
                        artist = artist_block.findAll('a')[0]['title']
                        artist = artist.encode('utf-8')
                        artist = clean_title(artist)
                        print("ONEMUSIC", href, song_title, artist_name)
                        if artist == artist_name:
                            print("ONEMUSIC PASSED", href, song_title, artist)
                            return self.sources(href, "HD")

        except:
            pass
        return []

示例#34

0

显示文件

文件： watchepisodes.py 项目： noobsandnerds/noobsandnerds

    def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb):
        try:
            headers = {'User-Agent': random_agent()}
            query = self.search_link % (urllib.quote_plus(title))
            query = urlparse.urljoin(self.base_link, query)
            cleaned_title = clean_title(title)
            ep_id = int(episode)
            season_id = int(season)
            html = requests.get(query, headers=headers, timeout=30).json()
            results = html['series']
            for item in results:
				r_title = item['label'].encode('utf-8')
				r_link = item['seo'].encode('utf-8')
				if cleaned_title == clean_title(r_title):
					r_page = self.base_link + "/" + r_link
					# print("WATCHEPISODES r1", r_title,r_page)
					r_html = BeautifulSoup(requests.get(r_page, headers=headers, timeout=30).content)
					r = r_html.findAll('div', attrs={'class': re.compile('\s*el-item\s*')})
					for container in r:
						try:
							r_href = container.findAll('a')[0]['href'].encode('utf-8')
							r_title = container.findAll('a')[0]['title'].encode('utf-8')
							# print("WATCHEPISODES r3", r_href,r_title)
							episode_check = "[sS]%02d[eE]%02d" % (int(season), int(episode))
							match = re.search(episode_check, r_title)
							if match:
								# print("WATCHEPISODES PASSED EPISODE", r_href)
								return self.sources(replaceHTMLCodes(r_href))
							else:
								match2 = re.search(episode_check, r_href)
								if match2:
									# print("WATCHEPISODES PASSED EPISODE", r_href)
									return self.sources(replaceHTMLCodes(r_href))									
						except:
							pass
        except:
            pass
        return []

示例#35

0

显示文件

文件： moviexk.py 项目： 24061993/noobsandnerds

    def scrape_movie(self, title, year, imdb):
        try:
            print("MOVIEXK")
            headers = {'User-Agent': random_agent()}
            query = self.search_link % (urllib.quote_plus(title) + "+" +
                                        str(year))
            query = urlparse.urljoin(self.base_link, query)
            cleaned_title = clean_title(title)
            html = BeautifulSoup(
                requests.get(query, headers=headers, timeout=30).content)

            containers = html.findAll('div', attrs={'class': 'inner'})
            for container in containers:
                print("MOVIEXK r1", container)
                movie_link = container.findAll('a')[0]
                r_href = movie_link['href']
                print("MOVIEXK r2", r_href)
                r_title = movie_link['title']
                link_year = container.findAll('span',
                                              attrs={'class': 'year'
                                                     })[0].findAll('a')[0].text
                print("MOVIEXK r3", r_title)
                print("MOVIEXK RESULTS", r_title, r_href)
                if str(year) == link_year:
                    if cleaned_title in clean_title(r_title):
                        redirect = requests.get(r_href,
                                                headers=headers,
                                                timeout=30).text
                        r_url = re.findall('<a href="(.*?)" class="btn-watch"',
                                           redirect)[0]
                        r_url = r_url.encode('utf-8')
                        print("MOVIEXK PLAY URL", r_url)
                        return self.sources(replaceHTMLCodes(r_url))
        except:
            pass
        return []

示例#36

0

显示文件

import xbmc
import json
import re
import urllib
import urlparse

import requests
from BeautifulSoup import BeautifulSoup as BS
from nanscrapers.common import clean_title, random_agent, replaceHTMLCodes
from ..scraper import Scraper

session = requests.Session()
headers = {"User-Agent": random_agent()}


class BeeMP3(Scraper):
    domains = ['beemp3']
    name = "BeeMP3"

    def __init__(self):
        self.base_link = 'https://beemp3.unblocked.bid'
        self.search_link = '/search?query=%s&field=artist'

    def scrape_music(self, title, artist, debrid=False):
        try:
            query = self.search_link % (urllib.quote_plus(artist))
            query = urlparse.urljoin(self.base_link, query)
            html = BS(session.get(query, headers=headers).content)
            result = self.process_results_page(html, title, artist, query)
            if result:
                return result

示例#37

0

显示文件

    def sources(self, url):
        sources = []
        try:
            if url == None: return sources

            absolute_url = urlparse.urljoin(self.base_link, url)
            headers = {'User-Agent': random_agent()}
            html = BeautifulSoup(
                requests.get(absolute_url, headers=headers,
                             timeout=30).content)

            pages = []

            embed = html.findAll('div', attrs={'id': 'embed'})[0]
            pages.append(embed.findAll('iframe')[0]["src"])

            for page in pages:
                try:
                    if not page.startswith('http'):
                        page = 'http:%s' % page

                    html = BeautifulSoup(
                        requests.get(page, headers=headers,
                                     timeout=30).content)

                    # captions = html.findAll(text=re.compile('kind\s*:\s*(?:\'|\")captions(?:\'|\")'))
                    # if not captions: break

                    try:
                        link_text = html.findAll(text=re.compile(
                            'url\s*:\s*\'(http(?:s|)://api.pcloud.com/.+?)\'')
                                                 )[0]
                        link = re.findall(
                            'url\s*:\s*\'(http(?:s|)://api.pcloud.com/.+?)\'',
                            link_text)[0]
                        variants = json.loads(
                            requests.get(link, headers=headers,
                                         timeout=30).content)['variants']
                        for variant in variants:
                            if 'hosts' in variant and 'path' in variant and 'height' in variant:
                                video_url = '%s%s' % (variant['hosts'][0],
                                                      variant['path'])
                                heigth = variant['height']
                                if not video_url.startswith('http'):
                                    video_url = 'http://%s' % video_url
                                sources.append({
                                    'source': 'cdn',
                                    'quality': str(heigth),
                                    'scraper': self.name,
                                    'url': video_url,
                                    'direct': False
                                })
                    except:
                        pass

                    try:
                        links_text = html.findAll(text=re.compile(
                            '"?file"?\s*:\s*"(.+?)"\s*,\s*"?label"?\s*:\s*"(.+?)"'
                        ))
                        if len(links_text) > 0:
                            for link_text in links_text:
                                try:
                                    links = re.findall(
                                        '"?file"?\s*:\s*"([^"]+)"\s*,\s*"?label"?\s*:\s*"(\d+)p?[^"]*"',
                                        link_text)
                                    for link in links:
                                        video_url = link[0]
                                        if not video_url.startswith('http'):
                                            video_url = 'http:%s' % video_url
                                        try:
                                            req = requests.head(
                                                video_url, headers=headers)
                                            if req.headers['Location'] != "":
                                                video_url = req.headers[
                                                    'Location']
                                        except:
                                            pass
                                        quality = link[1]
                                        sources.append({
                                            'source': 'google video',
                                            'quality': quality,
                                            'scraper': self.name,
                                            'url': video_url,
                                            'direct': True
                                        })
                                except:
                                    continue
                    except:
                        pass
                except:
                    pass

        except:
            pass

        return sources

示例#38

0

显示文件

文件： freemusic.py 项目： amadu80/repository.xvbmc

import xbmc,xbmcaddon,time
import re
import requests
from nanscrapers.common import clean_title,clean_search,random_agent,send_log,error_log
from ..scraper import Scraper

dev_log = xbmcaddon.Addon('script.module.nanscrapers').getSetting("dev_log")

headers = {"User-Agent": random_agent()}

class freemusic(Scraper):
    domains = ['freemusicdownloads']
    name = "Freemusic"
    sources = []
    

    def __init__(self):
        self.base_link = 'http://down.freemusicdownloads.world/'
        self.sources = []
        if dev_log=='true':
            self.start_time = time.time()
    
    def scrape_music(self, title, artist, debrid=False):
        try:
            song_search = clean_title(title.lower()).replace(' ','+')
            artist_search = clean_title(artist.lower()).replace(' ','+')
            start_url = '%sresults?search_query=%s+%s'    %(self.base_link,artist_search,song_search)
            html = requests.get(start_url, headers=headers, timeout=20).content
            match = re.compile('<h4 class="card-title">.+?</i>(.+?)</h4>.+?id="(.+?)"',re.DOTALL).findall(html)
            count = 0
            for m, link in match:

示例#39

0

显示文件

文件： onemovies.py 项目： noobsandnerds/noobsandnerds

    def sources(self, url):
        sources = []
        try:
            # print("ONEMOVIES SOURCES", url)

            if url == None: return sources
            referer = url
            headers = {'User-Agent': random_agent()}
            url = url.replace('/watching.html', '')
            html = requests.get(url, headers=headers).content
            # print ("ONEMOVIES Source", html)
            try:
                url, episode = re.findall('(.+?)\?episode=(\d*)$', url)[0]
            except:
                episode = None
            vid_id = re.findall('-(\d+)', url)[-1]
            # print ("ONEMOVIES", vid_id)
            quality = re.findall('<span class="quality">(.*?)</span>', html)
            quality = str(quality)
            if quality == 'cam' or quality == 'ts':
                quality = 'CAM'
            elif quality == 'hd':
                quality = '720'
            else:
                quality = '480'
            try:
                headers = {'X-Requested-With': 'XMLHttpRequest'}
                headers['Referer'] = referer
                headers['User-Agent'] = random_agent()
                u = urlparse.urljoin(self.base_link, self.server_link % vid_id)
                # print("SERVERS", u)
                r = BeautifulSoup(requests.get(u, headers=headers).content)
                # print("SERVERS", r)
                containers = r.findAll('div', attrs={'class': 'les-content'})
                for result in containers:
                    links = result.findAll('a')
                    # print("ONEMOVIES", links)
                    for link in links:
                        title = str(link['title'])
                        # print("ONEMOVIES TITLE", title)
                        if not episode == None:
                            title = re.findall('Episode\s+(\d+):', title)[0]
                            title = '%01d' % int(title)
                            if title == episode:
                                episode_id = str(link['episode-id'])
                            # print("ONEMOVIES EPISODE", episode_id)
                            else:
                                continue

                        else:
                            episode_id = str(link['episode-id'])
                        onclick = str(link['onclick'])

                        key_gen = ''.join(random.choice(string.ascii_lowercase + string.digits) for x in range(16))
                        ################# FIX FROM MUCKY DUCK & XUNITY TALK ################
                        key = '87wwxtp3dqii'
                        key2 = '7bcq9826avrbi6m49vd7shxkn985mhod'
                        cookie = hashlib.md5(episode_id + key).hexdigest() + '=%s' % key_gen
                        a = episode_id + key2
                        b = key_gen
                        i = b[-1]
                        h = b[:-1]
                        b = i + h + i + h + i + h
                        hash_id = uncensored(a, b)
                        ################# FIX FROM MUCKY DUCK & XUNITY TALK ################

                        serverurl = self.base_link + '/ajax/v2_get_sources/' + episode_id + '?hash=' + urllib.quote(
                            hash_id)
                        # print ("playurl ONEMOVIES", serverurl)

                        headers = {'Accept-Language': 'en-US', 'Cookie': cookie, 'Referer': referer,
                                   'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36',
                                   'X-Requested-With': 'XMLHttpRequest'}
                        # print ("playurl ONEMOVIES", headers)
                        result = requests.get(serverurl, headers=headers).content
                        # print ("RESULT ONEMOVIES", result)
                        result = result.replace('\\', '')
                        # print ("ONEMOVIES Result", result)
                        url = re.findall('"?file"?\s*:\s*"(.+?)"', result)
                        url = [googletag(i) for i in url]
                        url = [i[0] for i in url if len(i) > 0]
                        u = []
                        try:
                            u += [[i for i in url if i['quality'] == '1080p'][0]]
                        except:
                            pass
                        try:
                            u += [[i for i in url if i['quality'] == '720'][0]]
                        except:
                            pass
                        try:
                            u += [[i for i in url if i['quality'] == '480'][0]]
                        except:
                            pass
                        url = replaceHTMLCodes(u[0]['url'])
                        quality = googletag(url)[0]['quality']

                        # print ("ONEMOVIES PLAY URL", quality, url)

                        sources.append({'source': 'google video', 'quality': quality, 'scraper': self.name, 'url': url,
                                        'direct': True})
            except:
                pass

        except:
            pass
        return sources

示例#40

0

显示文件

文件： pubfilm.py 项目： 24061993/noobsandnerds

    def sources(self, url):
        sources = []
        try:
            if url == None: return sources

            if not self.base_link in url:
                url = urlparse.urljoin(self.base_link, url)

            content = re.compile('(.+?)\?episode=\d*$').findall(url)
            video_type = 'movie' if len(content) == 0 else 'episode'

            try:
                url, episode = re.compile('(.+?)\?episode=(\d*)$').findall(
                    url)[0]
            except:
                pass

            headers = {'User-Agent': random_agent()}
            html = self.scraper.get(url, headers=headers, timeout=30).content

            try:
                compressedstream = StringIO.StringIO(html)
                html = gzip.GzipFile(fileobj=compressedstream).read()
                html = BeautifulSoup(html)
            except:
                html = BeautifulSoup(html)

            links = html.findAll('a', attrs={'target': 'EZWebPlayer'})
            for link in links:
                href = replaceHTMLCodes(link['href'])
                if not "get.php" in href:
                    continue

                if video_type == 'episode':
                    link_episode_number = re.compile('(\d+)').findall(
                        link.string)
                    if len(link_episode_number) > 0:
                        link_episode_number = link_episode_number[-1]
                        if not link_episode_number == '%01d' % int(episode):
                            continue

                referer = url
                headers = {'User-Agent': random_agent(), 'Referer': referer}
                html = self.scraper.get(href, headers=headers,
                                        timeout=30).content
                source = re.findall('sources\s*:\s*\[(.+?)\]', html)[0]
                files = re.findall(
                    '"file"\s*:\s*"(.+?)".+?"label"\s*:\s*"(.+?)"', source)
                if files:
                    quality_url_pairs = [{
                        'url': file[0],
                        'quality': file[1][:-1]
                    } for file in files]
                else:
                    files = re.findall('"file"\s*:\s*"(.+?)".+?}', source)
                    quality_url_pairs = [{
                        'url': file,
                        'quality': "SD"
                    } for file in files]

                for pair in quality_url_pairs:
                    sources.append({
                        'source': 'google video',
                        'quality': pair['quality'],
                        'scraper': self.name,
                        'url': pair['url'],
                        'direct': True
                    })
        except:
            pass

        return sources

示例#41

0

显示文件

文件： pelispedia.py 项目： noobsandnerds/noobsandnerds

 def get_imdb_title(self, imdb):
     headers = {'User-Agent': random_agent(), 'Accept-Language': 'es-es'}
     html = BeautifulSoup(requests.get('http://www.imdb.com/title/%s' % imdb, headers=headers).content)
     html_title = html.findAll('title')[0].text.encode('utf-8')
     imdb_title = re.sub('(?:\(||\(TV Series\s|\s)\d{4}.+', '', html_title).strip()
     return imdb_title

示例#42

0

显示文件

    def sources(self, url):
        sources = []
        alt_links = []
        play_links = []
        try:

            if url == None: return sources
            headers = {'User-Agent': random_agent()}
            mainpage = requests.get(url, headers=headers)
            html = BeautifulSoup(requests.get(url, headers=headers).content)
            try:
                film_quality = re.findall('<div class="poster-qulabel">(.*?)</div>', mainpage)[0]
                print ("MOVIEGO film_quality", film_quality)
                if "1080" in film_quality:
                    quality = "1080"
                elif "720" in film_quality:
                    quality = "720"
                else:
                    quality = "SD"
                url = re.findall('file:\s+"([^"]+)"', mainpage)[0]
                url = url.encode('utf-8')
                sources.append({'source': 'CDN', 'quality': quality, 'scraper': self.name, 'url': url, 'direct': True})
            except:
                pass
            iframe = html.findAll("iframe")[0]
            original_frame = iframe['src']
            iframe_html = BeautifulSoup(requests.get(iframe["src"], headers=headers).content)
            scripts = iframe_html.findAll("script")
            unpacked_script = ""
            for script in scripts:
                try:
                    unpacked_script += unpack(script.text)
                except:
                    pass
            try:
                alternative_links = re.findall('Alternative (\d+)<', unpacked_script)
                for alts in alternative_links: alt_links.append(alts)
            except:
                pass
            # print ("MOVIEGO ALTS", alt_links)
            links = re.findall('<source src="(.*?)"', unpacked_script)
            if links:
                for link_url in links:
                    if "google" in link_url:
                        link_url = link_url.replace(' ', '')
                        play_links.append(link_url)

        except:
            pass
        try:
            for ids in alt_links:
                headers = {'User-Agent': random_agent()}
                alt_frames = original_frame + "?source=a" + ids
                alt_iframe_html = BeautifulSoup(requests.get(alt_frames, headers=headers).content)
                alt_scripts = alt_iframe_html.findAll("script")
                unpacked_script = ""
                for script in alt_scripts:
                    try:
                        unpacked_script += unpack(script.text)
                    except:
                        pass
                links = re.findall('<source src="(.*?)"', unpacked_script)
                if links:
                    for link_url in links:
                        if "google" in link_url:
                            link_url = link_url.replace(' ', '')
                            play_links.append(link_url)

        except:
            pass
        ############# DUPLICATES CHECK ################
        try:
            dupes = []
            for url in play_links:
                if not url in dupes:
                    dupes.append(url)
                    print ("MOVIEGO PLAY url", url)
                    quality = googletag(url)[0]['quality']
                    url = url.encode('utf-8')
                    sources.append({'source': 'google video', 'quality': quality, 'scraper': self.name, 'url': url,
                                    'direct': True})

        except:
            pass

        return sources

示例#43

0

显示文件

文件： sezonlukdizi.py 项目： stick141/modules4all

    def sources(self, url):
        sources = []
        try:
            if url == None: return sources

            absolute_url = urlparse.urljoin(self.base_link, url)
            headers = {'User-Agent': random_agent()}
            html = BeautifulSoup(requests.get(absolute_url, headers=headers, timeout=30).content)

            pages = []

            embed = html.findAll('div', attrs={'id': 'embed'})[0]
            pages.append(embed.findAll('iframe')[0]["src"])

            for page in pages:
                try:
                    if not page.startswith('http'):
                        page = 'http:%s' % page

                    html = BeautifulSoup(requests.get(page, headers=headers, timeout=30).content)

                    # captions = html.findAll(text=re.compile('kind\s*:\s*(?:\'|\")captions(?:\'|\")'))
                    # if not captions: break

                    try:
                        link_text = html.findAll(text=re.compile('url\s*:\s*\'(http(?:s|)://api.pcloud.com/.+?)\''))[0]
                        link = re.findall('url\s*:\s*\'(http(?:s|)://api.pcloud.com/.+?)\'', link_text)[0]
                        variants = json.loads(requests.get(link, headers=headers, timeout=30).content)['variants']
                        for variant in variants:
                            if 'hosts' in variant and 'path' in variant and 'height' in variant:
                                video_url = '%s%s' % (variant['hosts'][0], variant['path'])
                                heigth = variant['height']
                                if not video_url.startswith('http'):
                                    video_url = 'http://%s' % video_url
                                sources.append(
                                    {'source': 'cdn', 'quality': str(heigth), 'scraper': self.name, 'url': video_url,
                                     'direct': False})
                    except:
                        pass

                    try:
                        links_text = html.findAll(
                            text=re.compile('"?file"?\s*:\s*"(.+?)"\s*,\s*"?label"?\s*:\s*"(.+?)"'))
                        if len(links_text) > 0:
                            for link_text in links_text:
                                try:
                                    links = re.findall('"?file"?\s*:\s*"([^"]+)"\s*,\s*"?label"?\s*:\s*"(\d+)p?[^"]*"',
                                                       link_text)
                                    for link in links:
                                        video_url = link[0]
                                        if not video_url.startswith('http'):
                                            video_url = 'http:%s' % video_url
                                        try:
                                            req = requests.head(video_url, headers=headers)
                                            if req.headers['Location'] != "":
                                                video_url = req.headers['Location']
                                        except:
                                            pass
                                        quality = link[1]
                                        sources.append(
                                            {'source': 'google video', 'quality': quality, 'scraper': self.name,
                                             'url': video_url, 'direct': True})
                                except:
                                    continue
                    except:
                        pass
                except:
                    pass

        except:
            pass

        return sources

示例#44

0

显示文件

文件： dizigold.py 项目： 24061993/noobsandnerds

    def sources(self, url):
        sources = []
        try:
            if url == None: return sources

            referer = urlparse.urljoin(self.base_link, url)

            headers = {}
            headers['Referer'] = referer
            headers['User-Agent'] = random_agent()

            html = requests.get(referer, headers=headers, timeout=30).content

            player_id = re.compile('var\s*view_id\s*=\s*"(\d*)"').findall(
                html)[0]
            player_url = self.player_link % player_id
            player_html = requests.get(player_url, headers=headers,
                                       timeout=30).content
            player_html_parsed = BeautifulSoup(player_html)

            try:
                video_url = player_html_parsed.findAll('iframe')[-1]['src']

                if 'openload' in video_url:
                    host = 'openload.co'
                    direct = False
                    video_url = [{'url': video_url, 'quality': 'HD'}]

                elif 'ok.ru' in video_url:
                    host = 'vk'
                    direct = True
                    video_url = odnoklassniki(video_url)

                elif 'vk.com' in video_url:
                    host = 'vk'
                    direct = True
                    video_url = vk(video_url)

                else:
                    raise Exception()

                for i in video_url:
                    sources.append({
                        'source': host,
                        'quality': i['quality'],
                        'scraper': self.name,
                        'url': i['url'],
                        'direct': direct
                    })
            except:
                pass

            try:

                links = re.compile(
                    '"?file"?\s*:\s*"([^"]+)"\s*,\s*"?label"?\s*:\s*"(\d+)p?"'
                ).findall(player_html)

                for link in links:
                    sources.append({
                        'source': 'google video',
                        'quality': link[1],
                        'scraper': self.name,
                        'url': link[0],
                        'direct': True
                    })

            except:
                pass

        except:
            pass

        return sources

示例#45

0

显示文件

文件： mfree.py 项目： 24061993/noobsandnerds

    def sources(self, url, quality):
        sources = []
        try:
            headers = {
                'User-Agent': random_agent(),
                'X-Requested-With': 'XMLHttpRequest',
                'Referer': url
            }
            html = BeautifulSoup(
                requests.get(url, headers=headers, timeout=30).content)
            servers = html.findAll(
                "span", attrs={'class': re.compile(".*?btn-eps.*?")})
            for server in servers:
                try:
                    server_url = '/demo.php?v=%s' % server["link"]
                    server_url = urlparse.urljoin(self.base_link, server_url)
                    server_html = requests.get(server_url,
                                               headers=headers,
                                               timeout=30).content
                    links = []
                    try:
                        links.extend(
                            re.findall(r'sources: \[ \{file: "(.*?)"',
                                       server_html, re.I | re.DOTALL))
                    except:
                        pass
                    try:
                        links.extend(
                            re.findall(r'<source.*?src="(.*?)"', server_html,
                                       re.I | re.DOTALL))
                    except:
                        pass
                    try:
                        links.extend(
                            re.findall(r'<iframe.*?src="(.*?)"', server_html,
                                       re.I | re.DOTALL))
                    except:
                        pass

                    for link in links:
                        try:
                            link_source = link.replace('../view.php?',
                                                       'view.php?').replace(
                                                           './view.php?',
                                                           'view.php?')
                            if not link_source.startswith('http'):
                                link_source = urlparse.urljoin(
                                    self.base_link, link_source)

                            if "m4u" in link_source:
                                try:
                                    req = requests.head(link_source,
                                                        headers=headers)
                                    if req.headers['Location'] != "":
                                        link_source = req.headers['Location']
                                except:
                                    pass

                            if 'google' in link_source:
                                quality = googletag(link_source)[0]['quality']
                                sources.append({
                                    'source': 'google video',
                                    'quality': quality,
                                    'scraper': self.name,
                                    'url': link_source,
                                    'direct': True
                                })
                            elif 'openload.co' in link_source:
                                sources.append({
                                    'source': 'openload.co',
                                    'quality': quality,
                                    'scraper': self.name,
                                    'url': link_source,
                                    'direct': False
                                })
                            else:
                                sources.append({
                                    'source': 'M4U',
                                    'quality': quality,
                                    'scraper': self.name,
                                    'url': link_source,
                                    'direct': True
                                })
                        except:
                            continue
                except:
                    continue
        except:
            pass
        return sources

示例#46

0

显示文件

文件： pelispedia.py 项目： noobsandnerds/noobsandnerds

    def sources(self, url):
        sources = []
        try:
            if not url.startswith('http://'):
                url = urlparse.urljoin(self.base_link, url)

            headers = {'User-Agent': random_agent()}
            html = BeautifulSoup(requests.get(url, headers=headers).content)

            headers['Referer'] = url
            player_iframe_url = html.findAll("iframe")[0]["src"]
            html = BeautifulSoup(requests.get(player_iframe_url, headers=headers).content)
            buttons = html.findAll('div', attrs={'id': 'botones'})[0]
            player_links = buttons.findAll('a')
            for player_link in player_links:
                try:
                    href = player_link["href"]
                    if "thevideos.tv" in href:
                        sources.append(
                            {'source': 'thevideos.tv', 'quality': 'SD', 'scraper': self.name, 'url': href, 'direct': False})
                        continue
                    elif "openload.co" in href:
                        sources.append(
                            {'source': 'openload.co', 'quality': 'SD', 'scraper': self.name, 'url': href, 'direct': False})
                        continue
                    elif "pelispedia" in href:
                        headers["referrer"] = player_iframe_url
                        html = requests.get(href, headers=headers).content
                        try:
                            html_sources = re.findall('sources\s*:\s*\[(.+?)\]', html)
                            for source in html_sources:
                                files = re.findall('"file"\s*:\s*"(.+?)"', source)
                                for file in files:
                                    file = file.split()[0].replace('\\/', '/')
                                    sources.append(
                                        {'source': 'google video', 'quality': googletag(file)[0]['quality'],
                                         'scraper': self.name, 'url': file, 'direct': True})
                        except:
                            pass

                        try:
                            headers["referrer"] = href
                            headers['X-Requested-With'] = 'XMLHttpRequest'

                            gks_url = urlparse.urljoin(self.base_link, '/Pe_flv_flsh/plugins/gkpluginsphp.php')
                            post = {'link': re.findall('gkpluginsphp.*?link\s*:\s*"([^"]+)', html)[0]}
                            episode_link = json.loads(requests.post(gks_url, data=post, headers=headers).content)['link']
                            sources.append(
                                {'source': 'google video', 'quality': 'SD', 'scraper': self.name, 'url': episode_link,
                                 'direct': True})
                        except:
                            pass

                        try:
                            headers['X-Requested-With'] = 'XMLHttpRequest'

                            post_parameters = re.findall('var\s+parametros\s*=\s*"([^"]+)', html)[0]
                            post_pic = urlparse.parse_qs(urlparse.urlparse(post_parameters).query)['pic'][0]
                            post = {'sou': 'pic', 'fv': '21', 'url': post_pic}

                            protected_url = urlparse.urljoin(self.base_link, '/Pe_Player_Html5/pk/pk/plugins/protected.php')
                            episode_link = json.loads(requests.post(protected_url, data=post, headers=headers))[0]["link"]
                            sources.append(
                                {'source': 'cdn', 'quality': 'SD', 'scraper': self.name, 'url': episode_link,
                                 'direct': True})
                        except:
                            pass
                except:
                    continue

            return sources
        except:
            pass

        return sources