示例#1
0
def resolve(base, reference, strict=True):
    """
    Resolve a reference URI against a base URI to form a target URI.

    Implements relative URI resolution according to RFC 3986 section 5.2.
    "Relative Resolution".

    Note that urllib.urljoin does not work with non-standard schemes (like our
    pydata: scheme) hence this implementation...
    """
    if not is_uri(base):
        raise UriSyntaxError("base was not a valid URI: {0}".format(base))
    if not is_uri_reference(reference):
        raise UriSyntaxError(
            "reference was not a valid URI-reference: {0}".format(reference))

    b, ref = urlsplit(base), urlsplit(reference)

    scheme, authority, path, query, fragment = None, None, None, None, None

    if not strict and ref.scheme == b.scheme:
        ref = SplitResult("", *ref[1:])

    if ref.scheme:
        scheme = ref.scheme
        authority = ref.netloc
        path = _remove_dot_segments(ref.path)
        query = ref.query
    else:
        if ref.netloc:
            authority = ref.netloc
            path = _remove_dot_segments(ref.path)
            query = ref.query
        else:
            if ref.path == "":
                path = b.path
                if ref.query:
                    query = ref.query
                else:
                    query = b.query
            else:
                if ref.path.startswith("/"):
                    path = _remove_dot_segments(ref.path)
                else:
                    path = _remove_dot_segments(_merge(b, ref.path))
                query = ref.query
            authority = b.netloc
        scheme = b.scheme
    fragment = ref.fragment

    return recombine(SplitResult(scheme, authority, path, query, fragment))
示例#2
0
def oembed(url):
    """
    Endpoint to support oEmbed (see https://oembed.com/). Example request::

        https://hasjob.co/api/1/oembed?url=https://hasjob.co/

    Required for services like embed.ly, which need a registered oEmbed API handler.
    """

    endpoint, view_args = endpoint_for(url)
    if endpoint not in embed_index_views:
        return jsonify({})

    board = Board.get(view_args.get('subdomain', 'www'))
    iframeid = 'hasjob-iframe-' + str(uuid4())

    parsed_url = urlsplit(url)
    embed_url = SplitResult(
        parsed_url.scheme,
        parsed_url.netloc,
        parsed_url.path,
        parsed_url.query + ('&' if parsed_url.query else '') +
        'embed=1&iframeid=' + iframeid,
        parsed_url.fragment,
    ).geturl()

    return jsonify({
        'provider_url':
        url_for('index', subdomain=None, _external=True),
        'provider_name':
        app.config['SITE_TITLE'],
        'thumbnail_width':
        200,
        'thumbnail_height':
        200,
        'thumbnail_url':
        url_for('static',
                filename='img/hasjob-logo-200x200.png',
                _external=True),
        'author_name':
        board.title if board else app.config['SITE_TITLE'],
        'author_url':
        board.url_for(_external=True)
        if board else url_for('index', subdomain=None, _external=True),
        'title':
        ' | '.join([board.title, board.caption])
        if board else app.config['SITE_TITLE'],
        'html':
        ('<iframe id="{iframeid}" src="{url}" '
         'width="100%" height="724" frameborder="0" scrolling="no">'.format(
             url=embed_url, iframeid=iframeid)),
        'version':
        '1.0',
        'type':
        'rich',
    })

    return jsonify({})
示例#3
0
def create_url(request,
               swap_scheme=False,
               swap_origin=False,
               downgrade=False,
               query_parameter_to_remove=u"redirection"):
    parsed = urlsplit(request.url)
    destination_netloc = parsed.netloc

    scheme = parsed.scheme
    if swap_scheme:
        scheme = u"http" if parsed.scheme == u"https" else u"https"
        hostname = parsed.netloc.split(u':')[0]
        port = request.server.config[u"ports"][scheme][0]
        destination_netloc = u":".join([hostname, str(port)])

    if downgrade:
        # These rely on some unintuitive cleverness due to WPT's test setup:
        # 'Upgrade-Insecure-Requests' does not upgrade the port number,
        # so we use URLs in the form `http://[domain]:[https-port]`,
        # which will be upgraded to `https://[domain]:[https-port]`.
        # If the upgrade fails, the load will fail, as we don't serve HTTP over
        # the secure port.
        if parsed.scheme == u"https":
            scheme = u"http"
        elif parsed.scheme == u"wss":
            scheme = u"ws"
        else:
            raise ValueError(u"Downgrade redirection: Invalid scheme '%s'" %
                             parsed.scheme)
        hostname = parsed.netloc.split(u':')[0]
        port = request.server.config[u"ports"][parsed.scheme][0]
        destination_netloc = u":".join([hostname, str(port)])

    if swap_origin:
        destination_netloc = __get_swapped_origin_netloc(destination_netloc)

    parsed_query = parse_qsl(parsed.query, keep_blank_values=True)
    parsed_query = [
        x for x in parsed_query if x[0] != query_parameter_to_remove
    ]

    destination_url = urlunsplit(
        SplitResult(scheme=scheme,
                    netloc=destination_netloc,
                    path=parsed.path,
                    query=urlencode(parsed_query),
                    fragment=None))

    return destination_url
示例#4
0
    def get_plane_uri(cls, observation_uri, product_id):
        """
        Initializes an Plane URI instance

        Arguments:
        observation_uri : the uri of the observation
        product_id : ID of the product
        """
        caom_util.type_check(observation_uri, ObservationURI, "observation_uri",
                             override=False)
        caom_util.type_check(product_id, str, "observation_uri", override=False)
        caom_util.validate_path_component(cls, "product_id", product_id)

        path = urlsplit(observation_uri.uri).path
        uri = SplitResult(ObservationURI._SCHEME, "", path + "/" +
                          product_id, "", "").geturl()
        return cls(uri)
示例#5
0
    def get_observation_uri(cls, collection, observation_id):
        """
        Initializes an Observation URI instance

        Arguments:
        collection : collection
        observation_id : ID of the observation
        """

        caom_util.type_check(collection, str, "collection", override=False)
        caom_util.type_check(observation_id, str, "observation_id", override=False)

        caom_util.validate_path_component(cls, "collection", collection)
        caom_util.validate_path_component(cls, "observation_id", observation_id)

        uri = SplitResult(ObservationURI._SCHEME, "", collection + "/" + observation_id,
                          "", "").geturl()
        return cls(uri)
示例#6
0
def ensure_scheme(url, default_scheme='http'):
    """Adds a scheme to a url if not present.

    Args:
        url (string): a url, assumed to start with netloc
        default_scheme (string): a scheme to be added

    Returns:
        string: URL with a scheme
    """
    parsed = urlsplit(url, scheme=default_scheme)
    if not parsed.netloc:
        parsed = SplitResult(scheme=parsed.scheme,
                             netloc=parsed.path,
                             path='',
                             query=parsed.query,
                             fragment=parsed.fragment)

    return urlunsplit(parsed)
示例#7
0
def preprocess_url(referrer, url):
    ''' Clean and filter URLs before scraping.
    '''
    if not url:
        return None

    fields = urlsplit(urljoin(
        referrer, url))._asdict()  # convert to absolute URLs and split
    fields['path'] = re.sub(r'/$', '', fields['path'])  # remove trailing /
    fields['fragment'] = ''  # remove targets within a page
    fields = SplitResult(**fields)

    if fields.netloc == domain and "replytocom" not in url and "comment" not in url:  # the replytocom is because of replies to comments in wp sites
        # Scrape pages of current domain only
        if fields.scheme == 'http':
            httpurl = cleanurl = fields.geturl()
            httpsurl = httpurl.replace('http:', 'https:', 1)
        else:
            httpsurl = cleanurl = fields.geturl()
            httpurl = httpsurl.replace('https:', 'http:', 1)
        if constrain in httpsurl or constrain in httpurl:
            return cleanurl

    return None
示例#8
0
    def handle_distrib(self, url):
        """React to a file dispatch message."""
        url = urlsplit(url)
        dummy, filename = os.path.split(url.path)
        LOGGER.debug("filename = %s", filename)
        # TODO: Should not make any assumptions on filename formats, should
        # load a description of it from a config file instead.
        if filename.endswith(".hmf"):
            risestr, satellite = filename[:-4].split("_", 1)
            risetime = datetime.strptime(risestr, "%Y%m%d%H%M%S")
            pname = pass_name(risetime, satellite)
            satellite = satellite.replace("_", "-")
            if satellite in self._excluded_platforms:
                return None
            swath = self._received_passes.get(pname, {}).copy()
            swath.pop('satellite', None)
            swath["platform_name"] = satellite
            swath["start_time"] = risetime
            swath["type"] = "binary"
            swath["format"] = "HRPT"
            if satellite == "NOAA-15":
                swath["sensor"] = ("avhrr/3", "amsu-a", "amsu-b", "hirs/3")
            elif satellite in ["NOAA-18", "NOAA-19"]:
                swath["sensor"] = ("avhrr/3", "mhs", "amsu-a", "hirs/4")
            swath["data_processing_level"] = "0"

        elif filename.startswith("P042") or filename.startswith("P154"):
            pds = {}
            pds["format"] = filename[0]
            pds["apid1"] = filename[1:8]
            pds["apid2"] = filename[8:15]
            pds["apid3"] = filename[15:22]
            pds["time"] = datetime.strptime(filename[22:33], "%y%j%H%M%S")
            pds["nid"] = filename[33]
            pds["ufn"] = filename[34:36]
            pds["extension"] = filename[36:40]
            risetime = pds["time"]
            if pds["apid1"][:3] == "042":
                satellite = "EOS-Terra"
                pname = pass_name(risetime, 'TERRA')
            elif pds["apid1"][:3] == "154":
                satellite = "EOS-Aqua"
                pname = pass_name(risetime, 'AQUA')
            else:
                raise ValueError("Unrecognized satellite ID: " +
                                 pds["apid1"][:3])

            if not satellite or satellite in self._excluded_platforms:
                LOGGER.debug("Platform name %s is excluded...", str(satellite))
                return None

            swath = self._received_passes.get(pname, {}).copy()
            swath.pop('satellite', None)
            swath['platform_name'] = satellite
            swath['start_time'] = risetime
            instruments = {
                "0064": "modis",
                "0141": "ceres+y",
                "0157": "ceres-y",
                "0261": "amsu-a1",
                "0262": "amsu-a1",
                "0290": "amsu-a2",
                "0342": "hsb",
                "0402": "amsr-e",
                "0404": "airs",
                "0405": "airs",
                "0406": "airs",
                "0407": "airs",
                "0414": "airs",
                "0415": "airs",
                "0419": "airs",
                "0957": "gbad",
            }
            swath["sensor"] = instruments.get(pds["apid1"][3:],
                                              pds["apid1"][3:])
            swath["format"] = "PDS"
            swath["type"] = "binary"
            swath["data_processing_level"] = "0"

        # NPP/JPSS RDRs
        elif filename.startswith("R") and filename.endswith(".h5"):
            # Occassionaly RT-STPS produce files with a nonstandard file
            # naming, lacking the 'RNSCA' field. We will try to deal with this
            # below (Adam - 2013-06-04):
            mda = {}
            mda["format"] = filename[0]
            file_ok = False
            for prefix in JPSS_INSTRUMENTS_FROM_FILENAMES:
                if filename.startswith(prefix):
                    mda["sensor"] = JPSS_INSTRUMENTS_FROM_FILENAMES[prefix]
                    start_time_items = filename.strip(prefix).split('_')[1:3]
                    end_time_item = filename.strip(prefix).split('_')[3]
                    satellite = JPSS_PLATFORM_NAME.get(
                        filename.strip(prefix).split('_')[0], None)
                    orbit = filename.strip(prefix).split('_')[4].strip('b')
                    file_ok = True
                    break

            if not file_ok:
                LOGGER.warning("Seems to be a NPP/JPSS RDR "
                               "file but name is not standard!")
                LOGGER.warning("filename = %s", filename)
                return None

            # satellite = "Suomi-NPP, NOAA-20, NOAA-21,..."
            if not satellite or satellite in self._excluded_platforms:
                LOGGER.debug("Platform name %s is excluded...", str(satellite))
                return None

            mda["start_time"] = \
                datetime.strptime(start_time_items[0] + start_time_items[1],
                                  "d%Y%m%dt%H%M%S%f")
            end_time = \
                datetime.strptime(start_time_items[0] + end_time_item,
                                  "d%Y%m%de%H%M%S%f")
            if mda["start_time"] > end_time:
                end_time += timedelta(days=1)
            mda["orbit"] = orbit

            # FIXME: swath start and end time is granule dependent.
            # Get the end time as well! - Adam 2013-06-03:
            start_time = mda["start_time"]
            pname = pass_name(start_time, SCISYS_NAMES.get(satellite))

            swath = self._received_passes.get(pname, {}).copy()
            swath.pop("satellite", None)
            swath["platform_name"] = satellite
            swath["start_time"] = start_time
            swath['end_time'] = end_time
            swath["sensor"] = mda["sensor"]
            swath["format"] = "RDR"
            swath["type"] = "HDF5"
            swath["data_processing_level"] = "0"

        # metop
        elif filename[4:12] == "_HRP_00_":
            # "AVHR": "avhrr",
            instruments = {
                "ASCA": "ascat",
                "AMSA": "amsu-a",
                "ATOV": "atovs",
                "AVHR": "avhrr/3",
                "GOME": "gome",
                "GRAS": "gras",
                "HIRS": "hirs/4",
                "IASI": "iasi",
                "MHSx": "mhs",
                "SEMx": "sem",
                "ADCS": "adcs",
                "SBUV": "sbuv",
                "HKTM": "vcdu34"
            }

            satellites = {"M02": "Metop-A", "M01": "Metop-B", "M03": "Metop-C"}

            satellite = satellites[filename[12:15]]
            risetime = datetime.strptime(filename[16:31], "%Y%m%d%H%M%SZ")
            falltime = datetime.strptime(filename[32:47], "%Y%m%d%H%M%SZ")

            pname = pass_name(risetime, satellite.upper())
            LOGGER.debug("pname= % s", str(pname))
            swath = self._received_passes.get(pname, {}).copy()
            swath.pop('satellite', None)
            swath["start_time"] = risetime
            swath["end_time"] = falltime
            swath["platform_name"] = satellite
            swath["sensor"] = instruments[filename[:4]]
            swath["format"] = "EPS"
            swath["type"] = "binary"
            swath["data_processing_level"] = "0"
        else:
            return None

        if url.scheme in ["", "file"]:
            scheme = "ssh"
            netloc = self._emitter
            uri = urlunsplit(
                SplitResult(scheme, netloc, url.path, url.query, url.fragment))
        elif url.scheme == "ftp":
            scheme = "ssh"
            netloc = url.hostname
            uri = urlunsplit(
                SplitResult(scheme, netloc, url.path, url.query, url.fragment))
        else:
            LOGGER.debug("url.scheme not expected: %s", url.scheme)

        swath["uid"] = os.path.split(url.path)[1]
        swath["uri"] = uri
        swath['variant'] = 'DR'
        return swath
示例#9
0
        # default, via the "zulipdev.com" hostname.
        EXTERNAL_HOST = 'zulipdev.com:9991'
        # Serve the main dev realm at the literal name "localhost",
        # so it works out of the box even when not on the Internet.
        REALM_HOSTS = {
            'zulip': 'localhost:9991',
        }
else:
    EXTERNAL_HOST = external_host_env
    REALM_HOSTS = {
        'zulip': EXTERNAL_HOST,
    }

# TODO: Replace with scripts.lib.zulip_tools.deport when this no longer needs to
# be Python 2 compatible for zthumbor.
r = SplitResult("", EXTERNAL_HOST, "", "", "")
assert r.hostname is not None
EXTERNAL_HOST_WITHOUT_PORT = "[" + r.hostname + "]" if ":" in r.hostname else r.hostname

ALLOWED_HOSTS = ['*']

# Uncomment extra backends if you want to test with them.  Note that
# for Google and GitHub auth you'll need to do some pre-setup.
AUTHENTICATION_BACKENDS = (
    'zproject.backends.DevAuthBackend',
    'zproject.backends.EmailAuthBackend',
    'zproject.backends.GitHubAuthBackend',
    'zproject.backends.GoogleAuthBackend',
    'zproject.backends.SAMLAuthBackend',
    # 'zproject.backends.AzureADAuthBackend',
    'zproject.backends.GitLabAuthBackend',