Python GZipProcessor примеры использования

Язык программирования: Python

Пространство имен/Пакет: gziphttp

Класс/Тип: GZipProcessor

Примеров на hotexamples.com: 5

Python GZipProcessor - 5 примеров найдено. Это лучшие примеры Python кода для gziphttp.GZipProcessor, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

GZipProcessor(5)

Основные методы

GZipProcessor (5)

Пример #1

Показать файл

Файл: geturls.py Проект: theit8514/FanFicFare

def get_urls_from_page(url, configuration=None, normalize=False):

    if not configuration:
        configuration = Configuration(["test1.com"], "EPUB", lightweight=True)

    data = None
    adapter = None
    try:
        adapter = adapters.getAdapter(configuration, url, anyurl=True)

        # special stuff to log into archiveofourown.org, if possible.
        # Unlike most that show the links to 'adult' stories, but protect
        # them, AO3 doesn't even show them if not logged in.  Only works
        # with saved user/pass--not going to prompt for list.
        if 'archiveofourown.org' in url:
            if adapter.getConfig("username"):
                if adapter.getConfig("is_adult"):
                    if '?' in url:
                        addurl = "&view_adult=true"
                    else:
                        addurl = "?view_adult=true"
                else:
                    addurl = ""
                # just to get an authenticity_token.
                data = adapter._fetchUrl(url + addurl)
                # login the session.
                adapter.performLogin(url, data)
                # get the list page with logged in session.

        if 'fimfiction.net' in url and adapter.getConfig("is_adult"):
            data = adapter._fetchUrl(url)
            adapter.set_adult_cookie()

        if 'tthfanfic.org' in url and adapter.getConfig("is_adult"):
            ## Simple fetch works in testing, but actual pages use a
            ## POST and has a 'ctkn' value, so we do too.
            # adapter._fetchUrl("https://www.tthfanfic.org/setmaxrating.php?sitemaxrating=5")
            adapter.setSiteMaxRating(url)

        # this way it uses User-Agent or other special settings.
        data = adapter._fetchUrl(url, usecache=False)
    except UnknownSite:
        # no adapter with anyurl=True, must be a random site.
        opener = u2.build_opener(u2.HTTPCookieProcessor(), GZipProcessor())
        data = opener.open(url).read()

    # kludge because I don't see it on enough sites to be worth generalizing yet.
    restrictsearch = None
    if 'scarvesandcoffee.net' in url:
        restrictsearch = ('div', {'id': 'mainpage'})

    return get_urls_from_html(data, url, configuration, normalize,
                              restrictsearch)

Пример #2

Показать файл

    def __init__(self, sections, fileform, lightweight=False):
        site = sections[-1]  # first section is site DN.
        ConfigParser.SafeConfigParser.__init__(self)

        self.lightweight = lightweight
        self.use_pagecache = False  # default to false for old adapters.

        self.linenos = dict()  # key by section or section,key -> lineno

        ## [injected] section has even less priority than [defaults]
        self.sectionslist = ['defaults', 'injected']

        ## add other sections (not including site DN) after defaults,
        ## but before site-specific.
        for section in sections[:-1]:
            self.addConfigSection(section)

        if site.startswith("www."):
            sitewith = site
            sitewithout = site.replace("www.", "")
        else:
            sitewith = "www." + site
            sitewithout = site

        self.addConfigSection(sitewith)
        self.addConfigSection(sitewithout)

        if fileform:
            self.addConfigSection(fileform)
            ## add other sections:fileform (not including site DN)
            ## after fileform, but before site-specific:fileform.
            for section in sections[:-1]:
                self.addConfigSection(section + ":" + fileform)
            self.addConfigSection(sitewith + ":" + fileform)
            self.addConfigSection(sitewithout + ":" + fileform)
        self.addConfigSection("overrides")

        self.listTypeEntries = get_valid_list_entries()

        self.validEntries = get_valid_entries()

        self.url_config_set = False

        self.override_sleep = None
        self.cookiejar = self.get_empty_cookiejar()
        self.opener = u2.build_opener(u2.HTTPCookieProcessor(self.cookiejar),
                                      GZipProcessor())

        self.pagecache = self.get_empty_pagecache()

Пример #3

Показать файл

def get_urls_from_page(url, configuration=None, normalize=False):

    if not configuration:
        configuration = Configuration("test1.com", "EPUB")

    data = None
    adapter = None
    try:
        adapter = adapters.getAdapter(configuration, url, anyurl=True)

        # special stuff to log into archiveofourown.org, if possible.
        # Unlike most that show the links to 'adult' stories, but protect
        # them, AO3 doesn't even show them if not logged in.  Only works
        # with saved user/pass--not going to prompt for list.
        if 'archiveofourown.org' in url:
            if adapter.getConfig("username"):
                if adapter.getConfig("is_adult"):
                    if '?' in url:
                        addurl = "&view_adult=true"
                    else:
                        addurl = "?view_adult=true"
                else:
                    addurl = ""
                # just to get an authenticity_token.
                data = adapter._fetchUrl(url + addurl)
                # login the session.
                adapter.performLogin(url, data)
                # get the list page with logged in session.

        # this way it uses User-Agent or other special settings.  Only AO3
        # is doing login.
        data = adapter._fetchUrl(url, usecache=False)
    except UnknownSite:
        # no adapter with anyurl=True, must be a random site.
        opener = u2.build_opener(u2.HTTPCookieProcessor(), GZipProcessor())
        data = opener.open(url).read()

    # kludge because I don't see it on enough sites to be worth generalizing yet.
    restrictsearch = None
    if 'scarvesandcoffee.net' in url:
        restrictsearch = ('div', {'id': 'mainpage'})

    return get_urls_from_html(data, url, configuration, normalize,
                              restrictsearch)

Пример #4

Показать файл

 def set_cookiejar(self, cj):
     self.cookiejar = cj
     saveheaders = self.opener.addheaders
     self.opener = u2.build_opener(u2.HTTPCookieProcessor(self.cookiejar),
                                   GZipProcessor())
     self.opener.addheaders = saveheaders

Пример #5

Показать файл

 def __init__(self):
     self.cookiejar = self.get_empty_cookiejar()
     self.opener = urllib2.build_opener(
         urllib2.HTTPCookieProcessor(self.cookiejar), GZipProcessor())