Exemplo n.º 1
0
def get(guild, realm, fields=[]):
    fields = _wrap_arr(fields)
    fields = [field for field in fields if field in VALID_FIELDS]
    
    guild = urllib2.quote(guild)
    realm = urllib2.quote(realm)
    data = json.load(urllib2.urlopen(API_ROOT + GUILD_API % (realm, guild, ",".join(fields))))
    
    return data
Exemplo n.º 2
0
    def _normalize_url(self):
        """
            Normalize the request url
        """

        self.url = urllib2.quote(self.url.encode('utf-8'),
                                 safe="%/:=&?~#+!$,;'@()*[]")
def get_wiki_content(title):
    # title is in unicode (utf-8) format with spaces, without underscores and
    # url escape characters

    wiki = wikiapi.WikiApi()

    spaces_to_underscores = '_'.join(title.split())
    utf8_encoded_title = spaces_to_underscores.encode('utf8')
    url_title = urllib2.quote(utf8_encoded_title) # url escape

    article_url = wiki.get_article_url(url_title)
    # print repr(article_url)
    opener = urllib2.build_opener()
    opener.addheaders = [('User-agent', 'Mozilla/5.0')]
    content = opener.open(article_url).read()
    art = wiki.get_article(content)
    # print "Got article: ", art.heading
    # print "Content: ", art.content
    # print
    return art.content
def get_wiki_content(title):
    # title is in unicode (utf-8) format with spaces, without underscores and
    # url escape characters

    wiki = wikiapi.WikiApi()

    spaces_to_underscores = '_'.join(title.split())
    utf8_encoded_title = spaces_to_underscores.encode('utf8')
    url_title = urllib2.quote(utf8_encoded_title)  # url escape

    article_url = wiki.get_article_url(url_title)
    # print repr(article_url)
    opener = urllib2.build_opener()
    opener.addheaders = [('User-agent', 'Mozilla/5.0')]
    content = opener.open(article_url).read()
    art = wiki.get_article(content)
    # print "Got article: ", art.heading
    # print "Content: ", art.content
    # print
    return art.content
Exemplo n.º 5
0
    def handle404(self, reqorig, url, container, obj):
        """
        Return a webob.Response which fetches the thumbnail from the thumb
        host, potentially writes it out to Swift so we don't 404 next time,
        and returns it. Note also that the thumb host might write it out
        to Swift so we don't have to.
        """
        # go to the thumb media store for unknown files
        reqorig.host = self.thumbhost
        # upload doesn't like our User-agent, otherwise we could call it
        # using urllib2.url()
        opener = urllib2.build_opener()
        # Pass on certain headers from the caller squid to the scalers
        opener.addheaders = []
        if reqorig.headers.get('User-Agent') != None:
            opener.addheaders.append(('User-Agent', reqorig.headers.get('User-Agent')))
        else:
            opener.addheaders.append(('User-Agent', self.user_agent))
        for header_to_pass in ['X-Forwarded-For', 'X-Original-URI']:
            if reqorig.headers.get( header_to_pass ) != None:
                opener.addheaders.append((header_to_pass, reqorig.headers.get( header_to_pass )))
        # At least in theory, we shouldn't be handing out links to originals
        # that we don't have (or in the case of thumbs, can't generate).
        # However, someone may have a formerly valid link to a file, so we
        # should do them the favor of giving them a 404.
        try:
            # break apach the url, url-encode it, and put it back together
            urlobj = list(urlparse.urlsplit(reqorig.url))
            urlobj[2] = urllib2.quote(urlobj[2], '%/')
            encodedurl = urlparse.urlunsplit(urlobj)
            # ok, call the encoded url
            upcopy = opener.open(encodedurl)

        except urllib2.HTTPError,status:
            if status.code == 404:
                resp = webob.exc.HTTPNotFound('Expected original file not found')
                return resp
            else:
                resp = webob.exc.HTTPNotFound('Unexpected error %s' % status)
                resp.status = status.code
                return resp
Exemplo n.º 6
0
    def _normalize_url(self):
        """
            Normalize the request url
        """

        self.url = urllib2.quote(self.url.encode('utf-8'), safe="%/:=&?~#+!$,;'@()*[]")
Exemplo n.º 7
0
class WMFRewrite(object):
    """
    Rewrite Media Store URLs so that swift knows how to deal.

    Mostly it's a question of inserting the AUTH_ string, and changing / to - in the container section.
    """
    def __init__(self, app, conf):
        self.app = app
        self.account = conf['account'].strip()
        self.authurl = conf['url'].strip()
        self.login = conf['login'].strip()
        self.key = conf['key'].strip()
        self.thumbhost = conf['thumbhost'].strip()
        self.writethumb = 'writethumb' in conf
        self.user_agent = conf['user_agent'].strip()
        self.bind_port = conf['bind_port'].strip()
        self.shard_containers = conf['shard_containers'].strip(
        )  #all, some, none
        if (self.shard_containers == 'some'):
            # if we're supposed to shard some containers, get a cleaned list of the containers to shard
            def striplist(l):
                return ([x.strip() for x in l])

            self.shard_container_list = striplist(
                conf['shard_container_list'].split(','))

        #self.logger = get_logger(conf)

    def handle404(self, reqorig, url, container, obj):
        """
        Return a webob.Response which fetches the thumbnail from the thumb
        host, potentially writes it out to Swift so we don't 404 next time,
        and returns it. Note also that the thumb host might write it out
        to Swift so we don't have to.
        """
        # go to the thumb media store for unknown files
        reqorig.host = self.thumbhost
        # upload doesn't like our User-agent, otherwise we could call it
        # using urllib2.url()
        opener = urllib2.build_opener()
        opener.addheaders = [('User-agent', self.user_agent)]
        # At least in theory, we shouldn't be handing out links to originals
        # that we don't have (or in the case of thumbs, can't generate).
        # However, someone may have a formerly valid link to a file, so we
        # should do them the favor of giving them a 404.
        try:
            upcopy = opener.open(reqorig.url)
        except urllib2.HTTPError, status:
            if status == 404:
                resp = webob.exc.HTTPNotFound(
                    'Expected original file not found')
                return resp
            else:
                resp = webob.exc.HTTPNotFound('Unexpected error %s' % status)
                return resp

        # get the Content-Type.
        uinfo = upcopy.info()
        c_t = uinfo.gettype()
        # sometimes Last-Modified isn't present; use now() when that happens.
        try:
            last_modified = time.mktime(uinfo.getdate('Last-Modified'))
        except TypeError:
            last_modified = time.mktime(time.localtime())

        if self.writethumb:
            # Fetch from upload, write into the cluster, and return it
            upcopy = Copy2(upcopy,
                           self.app,
                           url,
                           urllib2.quote(container),
                           obj,
                           self.authurl,
                           self.login,
                           self.key,
                           content_type=c_t,
                           modified=last_modified)

        resp = webob.Response(app_iter=upcopy, content_type=c_t)
        resp.headers.add('Last-Modified', uinfo.getheader('Last-Modified'))
        return resp
Exemplo n.º 8
0
def quote(card_name):
    """
    Quotes card name
    """
    return urllib2.quote(card_name)
Exemplo n.º 9
0
    def handle404(self, reqorig, url, container, obj):
        """
        Return a webob.Response which fetches the thumbnail from the thumb
        host and returns it. Note also that the thumb host might write it out
        to Swift so it won't 404 next time.
        """
        # go to the thumb media store for unknown files
        reqorig.host = self.thumbhost
        # upload doesn't like our User-agent, otherwise we could call it
        # using urllib2.url()
        proxy_handler = urllib2.ProxyHandler({'http': self.thumbhost})
        redirect_handler = DumbRedirectHandler()

        opener = urllib2.build_opener(redirect_handler, proxy_handler)
        # Thumbor doesn't need (and doesn't like) the proxy
        thumbor_opener = urllib2.build_opener(redirect_handler)

        # Pass on certain headers from the caller squid to the scalers
        opener.addheaders = []
        if reqorig.headers.get('User-Agent') is not None:
            opener.addheaders.append(('User-Agent', reqorig.headers.get('User-Agent')))
        else:
            opener.addheaders.append(('User-Agent', self.user_agent))
        for header_to_pass in ['X-Forwarded-For', 'X-Forwarded-Proto',
                               'Accept', 'Accept-Encoding', 'X-Original-URI']:
            if reqorig.headers.get(header_to_pass) is not None:
                opener.addheaders.append((header_to_pass, reqorig.headers.get(header_to_pass)))

        thumbor_opener.addheaders = opener.addheaders

        self.logger.debug("Addheaders: %r" % thumbor_opener.addheaders)

        # At least in theory, we shouldn't be handing out links to originals
        # that we don't have (or in the case of thumbs, can't generate).
        # However, someone may have a formerly valid link to a file, so we
        # should do them the favor of giving them a 404.
        try:
            # break apach the url, url-encode it, and put it back together
            urlobj = list(urlparse.urlsplit(reqorig.url))
            # encode the URL but don't encode %s and /s
            urlobj[2] = urllib2.quote(urlobj[2], '%/')
            encodedurl = urlparse.urlunsplit(urlobj)

            # Thumbor never needs URL mangling and it needs a different host
            if self.thumborhost:
                thumbor_reqorig = reqorig.copy()
                thumbor_reqorig.host = self.thumborhost
                thumbor_urlobj = list(urlparse.urlsplit(thumbor_reqorig.url))
                thumbor_urlobj[2] = urllib2.quote(thumbor_urlobj[2], '%/')
                thumbor_encodedurl = urlparse.urlunsplit(thumbor_urlobj)

            # if sitelang, we're supposed to mangle the URL so that
            # http://upload.wikimedia.org/wikipedia/commons/thumb/a/a2/Little_kitten_.jpg/330px-Little_kitten_.jpg
            # changes to http://commons.wikipedia.org/w/thumb_handler.php/a/a2/Little_kitten_.jpg/330px-Little_kitten_.jpg
            if self.backend_url_format == 'sitelang':
                match = re.match(r'^http://(?P<host>[^/]+)/(?P<proj>[^-/]+)/(?P<lang>[^/]+)/thumb/(?P<path>.+)', encodedurl)
                if match:
                    proj = match.group('proj')
                    lang = match.group('lang')
                    # and here are all the legacy special cases, imported from thumb_handler.php
                    if(proj == 'wikipedia'):
                        if(lang in ['meta', 'commons', 'internal', 'grants']):
                            proj = 'wikimedia'
                        if(lang in ['mediawiki']):
                            lang = 'www'
                            proj = 'mediawiki'
                    hostname = '%s.%s.org' % (lang, proj)
                    if(proj == 'wikipedia' and lang == 'sources'):
                        #yay special case
                        hostname = 'wikisource.org'
                    # ok, replace the URL with just the part starting with thumb/
                    # take off the first two parts of the path (eg /wikipedia/commons/); make sure the string starts with a /
                    encodedurl = 'http://%s/w/thumb_handler.php/%s' % (hostname, match.group('path'))
                    # add in the X-Original-URI with the swift got (minus the hostname)
                    opener.addheaders.append(('X-Original-URI', list(urlparse.urlsplit(reqorig.url))[2]))
                else:
                    # ASSERT this code should never be hit since only thumbs should call the 404 handler
                    self.logger.warn("non-thumb in 404 handler! encodedurl = %s" % encodedurl)
                    resp = webob.exc.HTTPNotFound('Unexpected error')
                    return resp
            else:
                # log the result of the match here to test and make sure it's sane before enabling the config
                match = re.match(r'^http://(?P<host>[^/]+)/(?P<proj>[^-/]+)/(?P<lang>[^/]+)/thumb/(?P<path>.+)', encodedurl)
                if match:
                    proj = match.group('proj')
                    lang = match.group('lang')
                    self.logger.warn("sitelang match has proj %s lang %s encodedurl %s" % (proj, lang, encodedurl))
                else:
                    self.logger.warn("no sitelang match on encodedurl: %s" % encodedurl)

            upcopy = thumbor_opener.open(thumbor_encodedurl)

        except urllib2.HTTPError, error:
            # copy the urllib2 HTTPError into a webob HTTPError class as-is

            class CopiedHTTPError(webob.exc.HTTPError):
                code = error.code
                title = error.msg

                def html_body(self, environ):
                    return self.detail

                def __init__(self):
                    super(CopiedHTTPError, self).__init__(
                        detail="".join(error.readlines()),
                        headers=error.hdrs.items())

            resp = CopiedHTTPError()
            return resp
Exemplo n.º 10
0
    def save_svg_and_png(self, kwargs):
        """ Save png out of the svg version of the chart
        """
        if not IFolderish.providedBy(self.context):
            return _("Can't save png chart on a non-folderish object !")
        form = getattr(self.request, 'form', {})
        kwargs.update(form)
        filename = kwargs.get('filename', 'img')
        chart_url = self.context.absolute_url() + "#" + "tab-" + filename
        svg_filename = filename + ".svg"
        filename += ".png"
        sp = self.siteProperties
        qr_size = sp.get('googlechart.qrcode_size', '70')
        object_ids = self.context.objectIds()
        if qr_size == '0':
            qr_size = '70'
        qr_url = (
            u"http://chart.apis.google.com"
            "/chart?cht=qr&chld=H|0&chs=%sx%s&chl=%s" % (
                qr_size, qr_size, urllib2.quote(chart_url)))
        self.request.form['qr_url'] = qr_url
        svg_data = kwargs.get('svg', '')
        if not svg_data:
            return _("Success")
        new_svg = False
        if svg_filename not in object_ids:
            new_svg = True
            svg_filename = self.context.invokeFactory('File', id=svg_filename)
        svg_obj = self.context._getOb(svg_filename)
        svg_file_field = svg_obj.getField('file')
        svg_field_data = svg_file_field.getRaw(svg_obj).getIterator().read()
        if svg_field_data and svg_data == svg_field_data:
            return _("Success")
        elif svg_field_data:
            # 21894 svg_data from the form and the data saved within the current
            # svg files sometimes has the clipPath id number changed, otherwise
            # the files are identical in which case we no longer need to perform
            # any svg and image generation
            pattern = re.compile(r'_ABSTRACT_RENDERER_ID_\d+')
            svg_data_match = pattern.search(svg_data).group()
            svg_field_data_matched = pattern.sub(svg_data_match, svg_field_data)
            if svg_data == svg_field_data_matched:
                return _("Success")
        # create image from the current svg
        img = super(SavePNGChart, self).__call__()
        if not img:
            return _("ERROR: An error occured while exporting your image. "
                     "Please try again later.")
        new_file = False
        if filename not in object_ids:
            new_file = True
            filename = self.context.invokeFactory('Image', id=filename)
        img_obj = self.context._getOb(filename)
        if new_file:
            img_obj.setExcludeFromNav(True)
        image_field = img_obj.getField('image')
        image_field.getMutator(img_obj)(img)
        if new_svg:
            svg_obj.setExcludeFromNav(True)
        svg_file_field.getMutator(svg_obj)(svg_data)

        wftool = getToolByName(svg_obj, "portal_workflow")
        state = wftool.getInfoFor(svg_obj, 'review_state', None)
        if state:
            if state != 'visible':
                workflows = wftool.getWorkflowsFor(svg_obj)
                workflow = workflows[0]
                transitions = workflow.transitions
                available_transitions = [transitions[i['id']] for i in
                                    wftool.getTransitionsFor(svg_obj)]

                to_do = [k for k in available_transitions
                         if k.new_state_id == 'published']

                self.request.form['_no_emails_'] = True
                for item in to_do:
                    workflow.doActionFor(svg_obj, item.id)
                    break
                # then make it public draft
                available_transitions = [transitions[i['id']] for i in
                                        wftool.getTransitionsFor(svg_obj)]

                to_do = [k for k in available_transitions
                         if k.new_state_id == 'visible']

                for item in to_do:
                    workflow.doActionFor(svg_obj, item.id)
                    break
                svg_obj.reindexObject()
        if not new_svg:
            notify(InvalidateCacheEvent(svg_obj))
        return _("Success")
Exemplo n.º 11
0
 def title_to_article_url(title):
     spaces_to_underscores = '_'.join(title.split())
     utf8_encoded_title = spaces_to_underscores.encode('utf8')
     url_title = urllib2.quote(utf8_encoded_title)  # url escape
     article_url = wiki.get_article_url(url_title)
     return article_url
Exemplo n.º 12
0
 def title_to_article_url(title):
     spaces_to_underscores = '_'.join(title.split())
     utf8_encoded_title = spaces_to_underscores.encode('utf8')
     url_title = urllib2.quote(utf8_encoded_title) # url escape
     article_url = wiki.get_article_url(url_title)
     return article_url
Exemplo n.º 13
0
    def get_card(name, redaction):
        """Parses card info, if no info returns card object without info and prices

        :return: models.Card object
        """
        page_url = MagiccardsScraper.MAGICCARDS_BASE_URL + MagiccardsScraper.MAGICCARDS_QUERY_TMPL % urllib2.quote(name)
        page = openurl(page_url)
        soup = BeautifulSoup(page)

        # if card was not found by name, try to use magiccards hints
        if not MagiccardsScraper._is_card_page(soup):
            hint = MagiccardsScraper._try_get_hint(name, soup)
            if hint is None:
                return None

            name = hint.text
            page_url = ext.url_join(ext.get_domain(page_url), hint['href'])
            page = openurl(page_url)
            soup = BeautifulSoup(page)

        # if card is found, but it's not english
        if not MagiccardsScraper._is_en(soup):
            en_link_tag = list(soup.find_all('table')[3].find_all('td')[2].find('img', alt='English').next_elements)[1]
            name = en_link_tag.text
            page_url = ext.url_join(ext.get_domain(page_url), en_link_tag['href'])
            page = openurl(page_url)
            soup = BeautifulSoup(page)

        # if card redaction is wrong, try to get correct
        if not MagiccardsScraper._reda_is(redaction, soup):
            page_url = MagiccardsScraper._get_correct_reda(redaction, soup)
            if page_url is None:
                return None

            page = openurl(page_url)
            soup = BeautifulSoup(page)

        type = MagiccardsScraper._get_card_type(soup)
        info = MagiccardsScraper._get_card_info(soup)
        price = MagiccardsScraper._get_prices(soup)

        card_info = models.CardInfo(**info)
        card_prices = models.CardPrices(**price)

        return models.Card(ext.uni(name), ext.uni(redaction), type, card_info, card_prices)
Exemplo n.º 14
0
    def save_svg_and_png(self, kwargs):
        """ Save png out of the svg version of the chart
        """
        if not IFolderish.providedBy(self.context):
            return _("Can't save png chart on a non-folderish object !")
        form = getattr(self.request, 'form', {})
        kwargs.update(form)
        filename = kwargs.get('filename', 'img')
        chart_url = self.context.absolute_url() + "#" + "tab-" + filename
        svg_filename = filename + ".svg"
        filename += ".png"
        sp = self.siteProperties
        qr_size = sp.get('googlechart.qrcode_size', '70')
        object_ids = self.context.objectIds()
        if qr_size == '0':
            qr_size = '70'
        qr_url = (u"https://chart.apis.google.com"
                  "/chart?cht=qr&chld=H%sC0&chs=%sx%s&chl=%s" %
                  ("%7", qr_size, qr_size, urllib2.quote(chart_url)))
        self.request.form['qr_url'] = qr_url
        svg_data = kwargs.get('svg', '')
        if not svg_data:
            return _("Success")
        new_svg = False
        if svg_filename not in object_ids:
            new_svg = True
            svg_filename = self.context.invokeFactory('File', id=svg_filename)
        svg_obj = self.context._getOb(svg_filename)
        svg_file_field = svg_obj.getField('file')
        svg_field_data = svg_file_field.getRaw(svg_obj).getIterator().read()
        if svg_field_data and svg_data == svg_field_data:
            return _("Success")
        elif svg_field_data:
            # 21894 svg_data from the form and the data saved within the current
            # svg files sometimes has the clipPath id number changed, otherwise
            # the files are identical in which case we no longer need to perform
            # any svg and image generation
            pattern = re.compile(r'_ABSTRACT_RENDERER_ID_\d+')
            # 79908 check if we have a result for pattern search
            pattern_match = pattern.search(svg_data)
            if pattern_match:
                svg_data_match = pattern_match.group()
                svg_field_data_matched = pattern.sub(svg_data_match,
                                                     svg_field_data)
                if svg_data == svg_field_data_matched:
                    return _("Success")
        # create image from the current svg
        img = super(SavePNGChart, self).__call__()
        # 79908 check if img return has PNG within the string
        # as img can contain ERROR message in case of an error
        # which means the image will contain a string instead of actual
        # image data
        if not img or img and 'PNG' not in img:
            return _("ERROR: An error occured while exporting your image. "
                     "Please try again later.")
        new_file = False
        if filename not in object_ids:
            new_file = True
            filename = self.context.invokeFactory('Image', id=filename)
        img_obj = self.context._getOb(filename)
        if new_file:
            img_obj.setExcludeFromNav(True)
        image_field = img_obj.getField('image')
        image_field.getMutator(img_obj)(img)
        if new_svg:
            svg_obj.setExcludeFromNav(True)
        svg_file_field.getMutator(svg_obj)(svg_data)

        wftool = getToolByName(svg_obj, "portal_workflow")
        state = wftool.getInfoFor(svg_obj, 'review_state', None)
        if state:
            if state != 'visible':
                workflows = wftool.getWorkflowsFor(svg_obj)
                workflow = workflows[0]
                transitions = workflow.transitions

                # publish
                for transition in wftool.getTransitionsFor(svg_obj):
                    tid = transition.get('id')
                    tob = transitions.get(tid)
                    if not tob:
                        continue

                    if tob.new_state_id != 'published':
                        continue

                    self.request.form['_no_emails_'] = True
                    workflow.doActionFor(svg_obj, tid)
                    break

                # then make it public draft
                for transition in wftool.getTransitionsFor(svg_obj):
                    tid = transition.get('id')
                    tob = transitions.get(tid)
                    if not tob:
                        continue

                    if tob.new_state_id != 'visible':
                        continue

                    workflow.doActionFor(svg_obj, tid)
                    break

                svg_obj.reindexObject()
        if not new_svg:
            notify(InvalidateCacheEvent(svg_obj))
        return _("Success")
Exemplo n.º 15
0
    def handle404(self, reqorig, url, container, obj):
        """
        Return a swob.Response which fetches the thumbnail from the thumb
        host and returns it. Note also that the thumb host might write it out
        to Swift so it won't 404 next time.
        """
        # upload doesn't like our User-agent, otherwise we could call it
        # using urllib2.url()
        thumbor_opener = urllib2.build_opener(DumbRedirectHandler())

        # Pass on certain headers from Varnish to Thumbor
        thumbor_opener.addheaders = []
        if reqorig.headers.get('User-Agent') is not None:
            thumbor_opener.addheaders.append(
                ('User-Agent', reqorig.headers.get('User-Agent')))
        else:
            thumbor_opener.addheaders.append(('User-Agent', self.user_agent))
        for header_to_pass in [
                'X-Forwarded-For', 'X-Forwarded-Proto', 'Accept',
                'Accept-Encoding', 'X-Original-URI'
        ]:
            if reqorig.headers.get(header_to_pass) is not None:
                header = (header_to_pass, reqorig.headers.get(header_to_pass))
                thumbor_opener.addheaders.append(header)

        # At least in theory, we shouldn't be handing out links to originals
        # that we don't have (or in the case of thumbs, can't generate).
        # However, someone may have a formerly valid link to a file, so we
        # should do them the favor of giving them a 404.
        try:
            reqorig.host = self.thumborhost
            thumbor_urlobj = list(urlparse.urlsplit(reqorig.url))
            thumbor_urlobj[2] = urllib2.quote(thumbor_urlobj[2], '%/')
            thumbor_encodedurl = urlparse.urlunsplit(thumbor_urlobj)

            upcopy = thumbor_opener.open(thumbor_encodedurl)
        except urllib2.HTTPError as error:
            # Wrap the urllib2 HTTPError into a swob HTTPException
            status = error.code
            if status not in swob.RESPONSE_REASONS:
                # Generic status description in case of unknown status reasons.
                status = "%s Error" % status
            return swob.HTTPException(status=status,
                                      body=error.msg,
                                      headers=error.hdrs.items())
        except urllib2.URLError as error:
            msg = 'There was a problem while contacting the thumbnailing service: %s' % \
                  error.reason
            return swob.HTTPServiceUnavailable(msg)

        # get the Content-Type.
        uinfo = upcopy.info()
        c_t = uinfo.gettype()

        resp = swob.Response(app_iter=upcopy, content_type=c_t)

        headers_whitelist = [
            'Content-Length', 'Content-Disposition', 'Last-Modified',
            'Accept-Ranges', 'XKey', 'Thumbor-Engine', 'Server',
            'Nginx-Request-Date', 'Nginx-Response-Date',
            'Thumbor-Processing-Time', 'Thumbor-Processing-Utime',
            'Thumbor-Request-Id', 'Thumbor-Request-Date'
        ]

        # add in the headers if we've got them
        for header in headers_whitelist:
            if (uinfo.getheader(header) != ''):
                resp.headers[header] = uinfo.getheader(header)

        # also add CORS; see also our CORS middleware
        resp.headers['Access-Control-Allow-Origin'] = '*'

        return resp
Exemplo n.º 16
0
    def handle404(self, reqorig, url, container, obj):
        """
        Return a webob.Response which fetches the thumbnail from the thumb
        host and returns it. Note also that the thumb host might write it out
        to Swift so it won't 404 next time.
        """
        # go to the thumb media store for unknown files
        reqorig.host = self.thumbhost
        # upload doesn't like our User-agent, otherwise we could call it
        # using urllib2.url()
        proxy_handler = urllib2.ProxyHandler({'http': self.thumbhost})
        redirect_handler = DumbRedirectHandler()
        opener = urllib2.build_opener(redirect_handler, proxy_handler)
        # Thumbor doesn't need (and doesn't like) the proxy
        thumbor_opener = urllib2.build_opener(redirect_handler)

        # Pass on certain headers from the caller squid to the scalers
        opener.addheaders = []
        if reqorig.headers.get('User-Agent') is not None:
            opener.addheaders.append(('User-Agent', reqorig.headers.get('User-Agent')))
        else:
            opener.addheaders.append(('User-Agent', self.user_agent))
        for header_to_pass in ['X-Forwarded-For', 'X-Forwarded-Proto',
                               'Accept', 'Accept-Encoding', 'X-Original-URI']:
            if reqorig.headers.get(header_to_pass) is not None:
                opener.addheaders.append((header_to_pass, reqorig.headers.get(header_to_pass)))

        thumbor_opener.addheaders = opener.addheaders

        # At least in theory, we shouldn't be handing out links to originals
        # that we don't have (or in the case of thumbs, can't generate).
        # However, someone may have a formerly valid link to a file, so we
        # should do them the favor of giving them a 404.
        try:
            # break apach the url, url-encode it, and put it back together
            urlobj = list(urlparse.urlsplit(reqorig.url))
            # encode the URL but don't encode %s and /s
            urlobj[2] = urllib2.quote(urlobj[2], '%/')
            encodedurl = urlparse.urlunsplit(urlobj)

            # Thumbor never needs URL mangling and it needs a different host
            if self.thumborhost:
                thumbor_reqorig = reqorig.copy()
                thumbor_reqorig.host = self.thumborhost
                thumbor_urlobj = list(urlparse.urlsplit(thumbor_reqorig.url))
                thumbor_urlobj[2] = urllib2.quote(thumbor_urlobj[2], '%/')
                thumbor_encodedurl = urlparse.urlunsplit(thumbor_urlobj)

            # if sitelang, we're supposed to mangle the URL so that
            # http://upload.wm.o/wikipedia/commons/thumb/a/a2/Foo_.jpg/330px-Foo_.jpg
            # changes to
            # http://commons.wp.o/w/thumb_handler.php/a/a2/Foo_.jpg/330px-Foo_.jpg
            if self.backend_url_format == 'sitelang':
                match = re.match(
                    r'^http://(?P<host>[^/]+)/(?P<proj>[^-/]+)/(?P<lang>[^/]+)/thumb/(?P<path>.+)',
                    encodedurl)
                if match:
                    proj = match.group('proj')
                    lang = match.group('lang')
                    # and here are all the legacy special cases, imported from thumb_handler.php
                    if(proj == 'wikipedia'):
                        if(lang in ['meta', 'commons', 'internal', 'grants']):
                            proj = 'wikimedia'
                        if(lang in ['mediawiki']):
                            lang = 'www'
                            proj = 'mediawiki'
                    hostname = '%s.%s.%s' % (lang, proj, self.tld)
                    if(proj == 'wikipedia' and lang == 'sources'):
                        # yay special case
                        hostname = 'wikisource.%s' % self.tld
                    # ok, replace the URL with just the part starting with thumb/
                    # take off the first two parts of the path
                    # (eg /wikipedia/commons/); make sure the string starts
                    # with a /
                    encodedurl = 'http://%s/w/thumb_handler.php/%s' % (
                        hostname, match.group('path'))
                    # add in the X-Original-URI with the swift got (minus the hostname)
                    opener.addheaders.append(
                        ('X-Original-URI', list(urlparse.urlsplit(reqorig.url))[2]))
                else:
                    # ASSERT this code should never be hit since only thumbs
                    # should call the 404 handler
                    self.logger.warn("non-thumb in 404 handler! encodedurl = %s" % encodedurl)
                    resp = webob.exc.HTTPNotFound('Unexpected error')
                    return resp
            else:
                # log the result of the match here to test and make sure it's
                # sane before enabling the config
                match = re.match(
                    r'^http://(?P<host>[^/]+)/(?P<proj>[^-/]+)/(?P<lang>[^/]+)/thumb/(?P<path>.+)',
                    encodedurl)
                if match:
                    proj = match.group('proj')
                    lang = match.group('lang')
                    self.logger.warn(
                        "sitelang match has proj %s lang %s encodedurl %s" % (
                            proj, lang, encodedurl))
                else:
                    self.logger.warn("no sitelang match on encodedurl: %s" % encodedurl)

            # To turn thumbor off and have thumbnail traffic served by image scalers,
            # replace the line below with this one:
            # upcopy = opener.open(encodedurl)
            upcopy = thumbor_opener.open(thumbor_encodedurl)
        except urllib2.HTTPError, error:
            # copy the urllib2 HTTPError into a webob HTTPError class as-is

            class CopiedHTTPError(webob.exc.HTTPError):
                code = error.code
                title = error.msg

                def html_body(self, environ):
                    return self.detail

                def __init__(self):
                    super(CopiedHTTPError, self).__init__(
                        detail="".join(error.readlines()),
                        headers=error.hdrs.items())

            return CopiedHTTPError()
Exemplo n.º 17
0
 def thumborify_url(self, reqorig, host):
     reqorig.host = host
     thumbor_urlobj = list(urlparse.urlsplit(reqorig.url))
     thumbor_urlobj[2] = urllib2.quote(thumbor_urlobj[2], '%/')
     return urlparse.urlunsplit(thumbor_urlobj)
Exemplo n.º 18
0
    def handle404(self, reqorig, url, container, obj):
        """
        Return a swob.Response which fetches the thumbnail from the thumb
        host and returns it. Note also that the thumb host might write it out
        to Swift so it won't 404 next time.
        """
        # go to the thumb media store for unknown files
        reqorig.host = self.thumbhost
        # upload doesn't like our User-agent, otherwise we could call it
        # using urllib2.url()
        proxy_handler = urllib2.ProxyHandler({'http': self.thumbhost})
        redirect_handler = DumbRedirectHandler()
        opener = urllib2.build_opener(redirect_handler, proxy_handler)
        # Thumbor doesn't need (and doesn't like) the proxy
        thumbor_opener = urllib2.build_opener(redirect_handler)

        # Pass on certain headers from the caller squid to the scalers
        opener.addheaders = []
        if reqorig.headers.get('User-Agent') is not None:
            opener.addheaders.append(
                ('User-Agent', reqorig.headers.get('User-Agent')))
        else:
            opener.addheaders.append(('User-Agent', self.user_agent))
        for header_to_pass in [
                'X-Forwarded-For', 'X-Forwarded-Proto', 'Accept',
                'Accept-Encoding', 'X-Original-URI'
        ]:
            if reqorig.headers.get(header_to_pass) is not None:
                opener.addheaders.append(
                    (header_to_pass, reqorig.headers.get(header_to_pass)))

        thumbor_opener.addheaders = opener.addheaders

        # At least in theory, we shouldn't be handing out links to originals
        # that we don't have (or in the case of thumbs, can't generate).
        # However, someone may have a formerly valid link to a file, so we
        # should do them the favor of giving them a 404.
        try:
            # break apach the url, url-encode it, and put it back together
            urlobj = list(urlparse.urlsplit(reqorig.url))
            # encode the URL but don't encode %s and /s
            urlobj[2] = urllib2.quote(urlobj[2], '%/')
            encodedurl = urlparse.urlunsplit(urlobj)

            # Thumbor never needs URL mangling and it needs a different host
            if self.thumborhost:
                thumbor_reqorig = swob.Request(reqorig.environ.copy())
                thumbor_reqorig.host = self.thumborhost
                thumbor_urlobj = list(urlparse.urlsplit(thumbor_reqorig.url))
                thumbor_urlobj[2] = urllib2.quote(thumbor_urlobj[2], '%/')
                thumbor_encodedurl = urlparse.urlunsplit(thumbor_urlobj)

            # if sitelang, we're supposed to mangle the URL so that
            # http://upload.wm.o/wikipedia/commons/thumb/a/a2/Foo_.jpg/330px-Foo_.jpg
            # changes to
            # http://commons.wp.o/w/thumb_handler.php/a/a2/Foo_.jpg/330px-Foo_.jpg
            if self.backend_url_format == 'sitelang':
                match = re.match(
                    r'^http://(?P<host>[^/]+)/(?P<proj>[^-/]+)/(?P<lang>[^/]+)/thumb/(?P<path>.+)',
                    encodedurl)
                if match:
                    proj = match.group('proj')
                    lang = match.group('lang')
                    # and here are all the legacy special cases, imported from thumb_handler.php
                    if (proj == 'wikipedia'):
                        if (lang in ['meta', 'commons', 'internal', 'grants']):
                            proj = 'wikimedia'
                        if (lang in ['mediawiki']):
                            lang = 'www'
                            proj = 'mediawiki'
                    hostname = '%s.%s.%s' % (lang, proj, self.tld)
                    if (proj == 'wikipedia' and lang == 'sources'):
                        # yay special case
                        hostname = 'wikisource.%s' % self.tld
                    # ok, replace the URL with just the part starting with thumb/
                    # take off the first two parts of the path
                    # (eg /wikipedia/commons/); make sure the string starts
                    # with a /
                    encodedurl = 'http://%s/w/thumb_handler.php/%s' % (
                        hostname, match.group('path'))
                    # add in the X-Original-URI with the swift got (minus the hostname)
                    opener.addheaders.append(
                        ('X-Original-URI',
                         list(urlparse.urlsplit(reqorig.url))[2]))
                else:
                    # ASSERT this code should never be hit since only thumbs
                    # should call the 404 handler
                    self.logger.warn(
                        "non-thumb in 404 handler! encodedurl = %s" %
                        encodedurl)
                    resp = swob.HTTPNotFound('Unexpected error')
                    return resp
            else:
                # log the result of the match here to test and make sure it's
                # sane before enabling the config
                match = re.match(
                    r'^http://(?P<host>[^/]+)/(?P<proj>[^-/]+)/(?P<lang>[^/]+)/thumb/(?P<path>.+)',
                    encodedurl)
                if match:
                    proj = match.group('proj')
                    lang = match.group('lang')
                    self.logger.warn(
                        "sitelang match has proj %s lang %s encodedurl %s" %
                        (proj, lang, encodedurl))
                else:
                    self.logger.warn("no sitelang match on encodedurl: %s" %
                                     encodedurl)

            # To turn thumbor off and have thumbnail traffic served by image scalers,
            # replace the line below with this one:
            # upcopy = opener.open(encodedurl)
            upcopy = thumbor_opener.open(thumbor_encodedurl)
        except urllib2.HTTPError as error:
            # Wrap the urllib2 HTTPError into a swob HTTPException
            status = error.code
            if status not in swob.RESPONSE_REASONS:
                # Generic status description in case of unknown status reasons.
                status = "%s Error" % status
            return swob.HTTPException(status=status,
                                      body=error.msg,
                                      headers=error.hdrs.items())
        except urllib2.URLError as error:
            msg = 'There was a problem while contacting the thumbnailing service: %s' % \
                  error.reason
            return swob.HTTPServiceUnavailable(msg)

        # get the Content-Type.
        uinfo = upcopy.info()
        c_t = uinfo.gettype()

        resp = swob.Response(app_iter=upcopy, content_type=c_t)

        headers_whitelist = [
            'Content-Length', 'Content-Disposition', 'Last-Modified',
            'Accept-Ranges', 'XKey', 'Thumbor-Engine', 'Server',
            'Nginx-Request-Date', 'Nginx-Response-Date',
            'Thumbor-Processing-Time', 'Thumbor-Processing-Utime',
            'Thumbor-Request-Id', 'Thumbor-Request-Date'
        ]

        # add in the headers if we've got them
        for header in headers_whitelist:
            if (uinfo.getheader(header) != ''):
                resp.headers[header] = uinfo.getheader(header)

        # also add CORS; see also our CORS middleware
        resp.headers['Access-Control-Allow-Origin'] = '*'

        return resp