Exemplo n.º 1
0
    def get_data(self):
        headers = get_random_header()
        if hasattr(self, '_referer'):
            headers['referer'] = self._referer

        r = scraper.get(self.url, headers=get_random_header())
        soup = BeautifulSoup(r.text, 'html.parser')

        self._scrape_metadata(soup)

        self._episode_urls = self._scarpe_episodes(soup)
        self._len = len(self._episode_urls)

        logging.debug('EPISODE IDS: length: {}, ids: {}'.format(
            self._len, self._episode_urls))

        self._episode_urls = [(no + 1, id)
                              for no, id in enumerate(self._episode_urls)]

        return self._episode_urls
Exemplo n.º 2
0
    def search(cls, query):
        headers = get_random_header()
        headers['referer'] = 'http://kissanime.ru/'
        res = scraper.post(
            'http://kissanime.ru/Search/Anime',
            data={
                'type': 'Anime',
                'keyword': query,
            },
            headers=headers,
        )

        soup = BeautifulSoup(res.text, 'html.parser')

        # If only one anime found, kissanime redirects to anime page.
        # We don't want that
        if soup.title.text.strip().lower() != "find anime":
            return [
                SearchResult(
                    title=soup.find('a', 'bigChar').text,
                    url='https://kissanime.ru' +
                    soup.find('a', 'bigChar').get('href'),
                    poster='',
                )
            ]

        searched = [s for i, s in enumerate(soup.find_all('td')) if not i % 2]

        ret = []
        for res in searched:
            res = SearchResult(
                title=res.text.strip(),
                url='https://kissanime.ru' + res.find('a').get('href'),
                poster='',
            )
            logging.debug(res)
            ret.append(res)

        return ret
Exemplo n.º 3
0
    def search(cls, query):
        headers = get_random_header()
        headers['referer'] = 'http://kisscartoon.ac/'
        res = scraper.get(
            'http://kisscartoon.ac/Search/',
            params={
                's': query,
            },
            headers=headers,
        )
        logging.debug('Result url: {}'.format(res.url))

        soup = BeautifulSoup(res.text, 'html.parser')
        ret = []
        for res in soup.select_one('.listing').find_all('a'):
            res = SearchResult(
                title=res.text.strip('Watch '),
                url=res.get('href'),
                poster='',
            )
            logging.debug(res)
            ret.append(res)

        return ret
Exemplo n.º 4
0
import os
import requests

from anime_downloader import session
from anime_downloader.const import get_random_header

__all__ = [
    'get',
    'post',
    'soupify',
]

logger = logging.getLogger(__name__)

cf_session = cfscrape.create_scraper()
default_headers = get_random_header()
temp_dir = tempfile.mkdtemp(prefix='animedl')
logger.debug(f"HTML file temp_dir: {temp_dir}")


def setup(func):
    """
    setup is a decorator which takes a function
    and converts it into a request method
    """
    def setup_func(url: str,
                   cf: bool = False,
                   sel: bool = False,
                   referer: str = None,
                   cache: bool = True,
                   headers=None,
Exemplo n.º 5
0
def driver_select():
    '''
    This configures what each browser should do
    and returns the corresponding driver.
    '''
    browser = get_browser_config()
    data_dir = get_data_dir()
    executable = get_browser_executable()
    binary = get_driver_binary()

    if browser == 'firefox':
        fireFox_Options = webdriver.FirefoxOptions()
        ops = [
            "--width=1920", "--height=1080",
            "-headless", "--log fatal"
        ]

        for option in ops:
            fireFox_Options.add_argument(option)

        fireFox_Profile = webdriver.FirefoxProfile()
        fireFox_Profile.set_preference(
            "general.useragent.override", get_random_header()['user-agent']
        )

        driver = webdriver.Firefox(
            # sets user-agent
            firefox_profile=fireFox_Profile,
            # sets various firefox settings
            options=fireFox_Options,
            # by default it will be None, if a binary location is in the config then it will use that
            firefox_binary=None if not executable else executable,
            # by default it will be "geckodriver", if a geckodriver location is in the config then it will use that
            executable_path=(binary if binary else "geckodriver"),
            # an attempt at stopping selenium from printing a pile of garbage to the console.
            service_log_path=os.path.devnull
        )

    elif browser == 'chrome':
        from selenium.webdriver.chrome.options import Options

        profile_path = os.path.join(data_dir, 'Selenium_chromium')
        chrome_options = Options()

        ops = [
            "--headless", "--disable-gpu", '--log-level=OFF',
            f"--user-data-dir={profile_path}", "--no-sandbox",
            "--window-size=1920,1080", f"user-agent={get_random_header()['user-agent']}"  # noqa
        ]

        for option in ops:
            chrome_options.add_argument(option)

        cap = None

        if executable:
            from selenium.webdriver.common.desired_capabilities import DesiredCapabilities

            cap = DesiredCapabilities.CHROME
            cap['binary_location'] = executable

        driver = webdriver.Chrome(
            # sets user-agent, and various chrome settings
            options=chrome_options,
            # by default it will be "chromedriver", if a chromedriver location is in the config then it will use that
            executable_path=(binary if binary else "chromedriver"),
            # by default it will be None, if a binary location is in the config then it will use that
            desired_capabilities=cap,
            # an attempt at stopping selenium from printing a pile of garbage to the console.
            service_log_path=os.path.devnull
        )
    return driver