Пример #1
0
 def post(self, url, post_params={}):
     request = Request(url, urllib.urlencode(post_params), timeout=browser_url_open_timeout)
     print 'dacbrowser post call'
     #print 'post, type(request): ', type(request)
     #print 'dir(request): ', dir(request)
     #print 'request timeout: ', request.timeout
     #print 'dir(request.timeout): ', dir(request.timeout)
     request.add_header('User-agent', self.useragent) 
     return self.__open__(request)
Пример #2
0
def get_urls(br, tokens):
    from urllib import quote_plus
    from mechanize import Request
    from lxml import html
    escaped = [quote_plus(x.encode('utf-8')) for x in tokens if x and x.strip()]
    q = b'+'.join(escaped)
    url = 'http://bigbooksearch.com/books/'+q
    br.open(url).read()
    req = Request('http://bigbooksearch.com/query.php?SearchIndex=books&Keywords=%s&ItemPage=1'%q)
    req.add_header('X-Requested-With', 'XMLHttpRequest')
    req.add_header('Referer', url)
    raw = br.open(req).read()
    root = html.fromstring(raw.decode('utf-8'))
    urls = [i.get('src') for i in root.xpath('//img[@src]')]
    return urls
Пример #3
0
    def _checkStoredInjections(self):
        for r in self.results:
            # At this state injections in Result obj are not
            # compacted yet so it will only be 1st injected param
            url, data = r.target.getPayloadedUrl(r.first_param, "")
            
            # In case of proxy 
            if self.engine.getOption('http-proxy') is not None:
                proxy = ProxyHandler({'http': self.engine.getOption('http-proxy')})
                opener = build_opener(proxy)
                install_opener(opener)
            
            # Some headers
            if self.engine.getOption('ua') is not None:
                if self.engine.getOption('ua') is "RANDOM":
                    headers = {'User-Agent': random.choice(USER_AGENTS)}
                else:
                    headers = {'User-Agent': self.engine.getOption('ua')}
            else:
                headers = {}
            if self.engine.getOption("cookie") is not None:
                headers["Cookie"] = self.engine.getOption("cookie")

            # Build the request
            req = Request(url, data, headers)
            try:
                to = 10 if self.engine.getOption('http-proxy') is None else 20
                response = urlopen(req, timeout=to)
            except HTTPError, e:
                self._addError(e.code, r.target.getAbsoluteUrl())
                continue 
            except URLError, e:
                self._addError(e.reason, r.target.getAbsoluteUrl())
                continue
Пример #4
0
    def _performInjections(self, target):
        # Check every parameter 
        for k, v in target.params.iteritems():
            pl = Payload(taint=True)
            url, data = target.getPayloadedUrl(k, pl.payload)
            
            # In case of proxy 
            if self.engine.getOption('http-proxy') is not None:
                proxy = ProxyHandler({'http': self.engine.getOption('http-proxy')})
                opener = build_opener(proxy)
                install_opener(opener)
            # Some headers
            if self.engine.getOption('ua') is not None:
                if self.engine.getOption('ua') is "RANDOM":
                    headers = {'User-Agent': random.choice(USER_AGENTS)}
                else:
                    headers = {'User-Agent': self.engine.getOption('ua')}
            else:
                headers = {}
            if self.engine.getOption("cookie") is not None:
                headers["Cookie"] = self.engine.getOption("cookie")

            # Build the request
            req = Request(url, data, headers)
            try:
                to = 10 if self.engine.getOption('http-proxy') is None else 20
                response = urlopen(req, timeout=to)
            except HTTPError, e:
                self._addError(e.code, target.getAbsoluteUrl())
                return
            except URLError, e:
                self._addError(e.reason, target.getAbsoluteUrl())
                return
Пример #5
0
    def request(self, method: str, path: str,
                data=None,
                headers=None):
        """Creates a request against the Remarkable Cloud API

        This function automatically fills in the blanks of base
        url & authentication.

        Args:
            method: The request method.
            path: complete url or path to request.
            data: raw data to put/post/...
            body: the body to request with. This will be converted to json.
            headers: a dict of additional headers to add to the request.
            params: Query params to append to the request.
            stream: Should the response be a stream?
        Returns:
            A Response instance containing most likely the response from
            the server.
        """

        if headers is None:
            headers = {}
        if not path.startswith("http"):
            if not path.startswith('/'):
                path = '/' + path
            url = f"{BASE_URL}{path}"
        else:
            url = path

        _headers = {
            "user-agent": USER_AGENT,
        }

        if self.token_set["usertoken"]:
            token = self.token_set["usertoken"]
            _headers["Authorization"] = f"Bearer {token}"
        for k in headers.keys():
            _headers[k] = headers[k]
        log.debug(url, _headers)

        # import logging
        # import sys
        # logger = logging.getLogger("mechanize")
        # logger.addHandler(logging.StreamHandler(sys.stdout))
        # logger.setLevel(logging.DEBUG)

        # self.browser.set_debug_http(True)
        # self.browser.set_debug_responses(True)
        # self.browser.set_debug_redirects(True)

        req = Request(url,
                      method=method,
                      data=data,
                      headers=_headers)

        resp = self.browser.open(req)

        return Response(resp)
 def markEpisode(self, episode):
     values = {"watched": "adding", "shid": episode}
     data = urllib.urlencode(values)
     req = Request(self._urlWacthed, " ")
     req.add_header(
         "User-Agent",
         "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.2.7) Gecko/20100713 Firefox/3.6.7"
     )
     req.add_header("Referer", self._urlBase)
     req.add_data(data)
     self._cookieJar.add_cookie_header(req)
     res = urlopen(req)
 def markEpisode(self,episode):
     values = {"watched": "adding", "shid": episode} 
     data = urllib.urlencode(values)                
     req = Request(self._urlWacthed, " ")
     req.add_header("User-Agent", "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.2.7) Gecko/20100713 Firefox/3.6.7")
     req.add_header("Referer", self._urlBase)
     req.add_data(data)
     self._cookieJar.add_cookie_header(req)
     res = urlopen(req)
Пример #8
0
    def _open_with_mechanize(self, url, data=None, referer=False):
        """Opens an internal request with the mechanize library.
        Since the request is internally dispatched, no open server
        port is required.

        :param url: A full qualified URL.
        :type url: string
        :param data: A dict with data which is posted using a `POST` request.
        :type data: dict
        :param referer: Sets the referer when set to ``True``.
        :type referer: Boolean (Default ``False``)
        """
        args = locals().copy()
        del args['self']
        preserved_request = getRequest()
        self.previous_request = ('_open_with_mechanize', args)
        self.previous_url = self.url

        if isinstance(url, Request):
            request = url
        else:
            data = self._prepare_post_data(data)
            request = Request(url, data)

        referer_url = ' '
        if referer:
            if referer is True and self.url:
                referer_url = self.url
            elif isinstance(referer, (str, unicode)):
                referer_url = referer
        request.add_header('REFERER', referer_url)
        request.add_header('HTTP_REFERER', referer_url)

        try:
            self.response = self.get_mechbrowser().open(request)
        except:
            self.response = None
            raise
        self.parse(self.response)
        self.previous_request_library = LIB_MECHANIZE
        setRequest(preserved_request)
Пример #9
0
def search(query, max_results=10, timeout=60):
    url = 'http://woblink.com/publication/ajax?mode=none&query=' + urllib.quote_plus(query.encode('utf-8'))
    if max_results > 10:
        if max_results > 20:
            url += '&limit=30'
        else:
            url += '&limit=20'
    br = browser(user_agent='CalibreCrawler/1.0')
    br.set_handle_gzip(True)
    rq = Request(url, headers={
        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'X-Requested-With': 'XMLHttpRequest',
        'Referrer':'http://woblink.com/ebooki-kategorie',
        'Cache-Control':'max-age=0',
    }, data=urllib.urlencode({
        'nw_filtry_filtr_zakrescen_formularz[min]':'0',
        'nw_filtry_filtr_zakrescen_formularz[max]':'350',
    }))
    r = br.open(rq)
    raw = r.read()
    doc = html.fromstring('<html><body>' + raw.decode('utf-8') + '</body></html>')
    counter = max_results

    for data in doc.xpath('//div[@class="nw_katalog_lista_ksiazka ebook " or @class="nw_katalog_lista_ksiazka ebook promocja"]'):
        if counter <= 0:
            break

        id = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_okladka nw_okladka"]/a[1]/@href'))
        if not id:
            continue

        cover_url = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_okladka nw_okladka"]/a[1]/img/@src'))
        title = ''.join(data.xpath('.//h3[@class="nw_katalog_lista_ksiazka_detale_tytul"]/a[1]/text()'))
        author = ', '.join(data.xpath('.//p[@class="nw_katalog_lista_ksiazka_detale_autor"]/a/text()'))
        price = ''.join(data.xpath('.//div[@class="nw_opcjezakupu_cena"]/span[2]/text()'))
        formats = ', '.join(data.xpath('.//p[@class="nw_katalog_lista_ksiazka_detale_format"]/span/text()'))

        s = SearchResult()
        s.cover_url = cover_url
        s.title = title.strip()
        s.author = author.strip()
        s.price = price + ' zł'
        s.detail_item = id.strip()
        s.formats = formats

        counter -= 1
        s.drm = SearchResult.DRM_LOCKED if 'DRM' in formats else SearchResult.DRM_UNLOCKED
        yield s
Пример #10
0
 def remote_run(self, name, m, *args):
     from mechanize import HTTPError, Request
     from calibre.utils.serialize import msgpack_loads, msgpack_dumps
     url = self.url + '/cdb/cmd/{}/{}'.format(name, getattr(m, 'version', 0))
     if self.library_id:
         url += '?' + urlencode({'library_id':self.library_id})
     rq = Request(url, data=msgpack_dumps(args),
                  headers={'Accept': MSGPACK_MIME, 'Content-Type': MSGPACK_MIME})
     try:
         res = self.br.open_novisit(rq, timeout=self.timeout)
         ans = msgpack_loads(res.read())
     except HTTPError as err:
         self.interpret_http_error(err)
         raise
     if 'err' in ans:
         if ans['tb']:
             prints(ans['tb'])
         raise SystemExit(ans['err'])
     return ans['result']
Пример #11
0
def get_urls(br, tokens):
    from urllib import quote_plus
    from mechanize import Request
    from lxml import html
    escaped = [quote_plus(x.encode('utf-8')) for x in tokens if x and x.strip()]
    q = b'+'.join(escaped)
    url = 'http://bigbooksearch.com/books/'+q
    br.open(url).read()
    req = Request('http://bigbooksearch.com/query.php?SearchIndex=books&Keywords=%s&ItemPage=1'%q)
    req.add_header('X-Requested-With', 'XMLHttpRequest')
    req.add_header('Referer', url)
    raw = br.open(req).read()
    root = html.fromstring(raw.decode('utf-8'))
    urls = [i.get('src') for i in root.xpath('//img[@src]')]
    return urls
 def download(url, serialize=Constants.picture_serialization):
     """
         Download the given url and serialize depending on the parameter
     """
     try:
         if url is None or not isinstance(url, six.types.StringTypes):
             print("No url was given to download")
             return (False, "No url was given to download", url)
         target = Constants.get_output_for_url(url)
         if verbose:
             print(u"[Process: {}] - Downl. url {} - {}"
                 .format(multiprocessing.current_process(), url, target))
         req = Request(url)
         web_file = urlopen(req)
         if serialize:
             with open(target, "wb") as handle:
                 handle.write(web_file.read())
         else:
             # For performance measurements
             web_file.read()
         return (True, None, url)
     except Exception as e:
         print(e)
         return (False, e, url)
Пример #13
0
def get_basic_data(browser, log, *skus):
    from calibre.utils.date import parse_only_date
    from mechanize import Request
    zeroes = ','.join('0' for sku in skus)
    data = {
        'skus': ','.join(skus),
        'drc': zeroes,
        'startPosition': '0',
        'sequence': '1',
        'selected': zeroes,
        'itemID': '0',
        'orderID': '0',
        'mailingID': '',
        'tContentWidth': '926',
        'originalOrder': ','.join(str(i) for i in range(len(skus))),
        'selectedOrderID': '0',
        'selectedSortColumn': '0',
        'listType': '1',
        'resultType': '32',
        'blockView': '1',
    }
    items_data_url = 'https://www.edelweiss.plus/GetTreelineControl.aspx?controlName=/uc/listviews/ListView_Title_Multi.ascx'
    req = Request(items_data_url, data)
    response = browser.open_novisit(req)
    raw = response.read()
    root = parse_html(raw)
    for item in root.xpath('//div[@data-priority]'):
        row = item.getparent().getparent()
        sku = item.get('id').split('-')[-1]
        isbns = [
            x.strip() for x in row.xpath(
                'descendant::*[contains(@class, "pev_sku")]/text()')[0].split(
                    ',') if check_isbn(x.strip())
        ]
        isbns.sort(key=len, reverse=True)
        try:
            tags = [
                x.strip() for x in astext(
                    row.xpath(
                        'descendant::*[contains(@class, "pev_categories")]')
                    [0]).split('/')
            ]
        except IndexError:
            tags = []
        rating = 0
        for bar in row.xpath(
                'descendant::*[contains(@class, "bgdColorCommunity")]/@style'):
            m = re.search('width: (\d+)px;.*max-width: (\d+)px', bar)
            if m is not None:
                rating = float(m.group(1)) / float(m.group(2))
                break
        try:
            pubdate = parse_only_date(astext(
                row.xpath('descendant::*[contains(@class, "pev_shipDate")]')
                [0]).split(':')[-1].split(u'\xa0')[-1].strip(),
                                      assume_utc=True)
        except Exception:
            log.exception('Error parsing published date')
            pubdate = None
        authors = []
        for x in [
                x.strip() for x in row.xpath(
                    'descendant::*[contains(@class, "pev_contributor")]/@title'
                )
        ]:
            authors.extend(a.strip() for a in x.split(','))
        entry = {
            'sku':
            sku,
            'cover':
            row.xpath('descendant::img/@src')[0].split('?')[0],
            'publisher':
            astext(
                row.xpath('descendant::*[contains(@class, "headerPublisher")]')
                [0]),
            'title':
            astext(row.xpath('descendant::*[@id="title_{}"]'.format(sku))[0]),
            'authors':
            authors,
            'isbns':
            isbns,
            'tags':
            tags,
            'pubdate':
            pubdate,
            'format':
            ' '.join(
                row.xpath(
                    'descendant::*[contains(@class, "pev_format")]/text()')).
            strip(),
            'rating':
            rating,
        }
        if entry['cover'].startswith('/'):
            entry['cover'] = None
        yield entry
Пример #14
0
import sys
from mechanize import ParseResponse,urlopen,urljoin,Request,Browser

if len(sys.argv)==1:
    uri="http://imdb.com"
else:
    uri=sys.argv[1]

req=Request(uri)
req.set_proxy("208.232.182.74:80","http")
response=urlopen(req)
forms=ParseResponse(response,backwards_compat=False)
form=forms[0]
form["q"]="fatih akin"
br=Browser()
br.open(form.click())

print br.title()

for link in br.links():
    if str(link.text).count('Fatih')>0:
        print link.absolute_url +":" +link.text