예제 #1
0
def url_fix(s, charset='utf-8'):
    r"""Sometimes you get an URL by a user that just isn't a real URL because
    it contains unsafe characters like ' ' and so on. This function can fix
    some of the problems in a similar way browsers handle data entered by the
    user:

    >>> url_fix(u'http://de.wikipedia.org/wiki/Elf (Begriffskl\xe4rung)')
    'http://de.wikipedia.org/wiki/Elf%20(Begriffskl%C3%A4rung)'

    :param s:
        The string with the URL to fix.
    :param charset:
        The target charset for the URL if the url was given as unicode string.
    """
    # First step is to convert backslashes (which are invalid in URLs anyways)
    # to slashes.  This is consistent with what Chrome does.
    s = s.replace('\\', '/')

    # For the specific case that we look like a malformed windows URL
    # we want to fix this up manually:
    if (s.startswith('file://') and s[7:8].isalpha()
            and s[8:10] in (':/', '|/')):
        s = 'file:///' + s[7:]

    url = urlsplit(s)
    path = urlquote(url.path, encoding=charset, safe='/%+$!*\'(),')
    qs = urlquote_plus(url.query, encoding=charset, safe=':&%=+$!*\'(),')
    anchor = urlquote_plus(url.fragment,
                           encoding=charset,
                           safe=':&%=+$!*\'(),')
    return urlunsplit((url.scheme, url.encode_netloc(), path, qs, anchor))
예제 #2
0
 def msg_format(self, content_type):
     if issubclass(content_type, SlackMessageContent):
         link_text = 'ArXiv Query: <http://export.arxiv.org/api/query?search_query={}|{}>'
         return MessageFragment(
             content_type(
                 link_text.format(urlquote_plus(self.search_query),
                                  self.search_query)), content_type)
     else:
         return super(ArxivQuery, self).msg_format(content_type)
예제 #3
0
파일: urls.py 프로젝트: bwhmather/verktyg
def url_fix(s, charset='utf-8'):
    r"""Sometimes you get an URL by a user that just isn't a real URL because
    it contains unsafe characters like ' ' and so on. This function can fix
    some of the problems in a similar way browsers handle data entered by the
    user:

    >>> url_fix(u'http://de.wikipedia.org/wiki/Elf (Begriffskl\xe4rung)')
    'http://de.wikipedia.org/wiki/Elf%20(Begriffskl%C3%A4rung)'

    :param s:
        The string with the URL to fix.
    :param charset:
        The target charset for the URL if the url was given as unicode string.
    """
    # First step is to convert backslashes (which are invalid in URLs anyways)
    # to slashes.  This is consistent with what Chrome does.
    s = s.replace('\\', '/')

    # For the specific case that we look like a malformed windows URL
    # we want to fix this up manually:
    if (
        s.startswith('file://') and
        s[7:8].isalpha() and
        s[8:10] in (':/', '|/')
    ):
        s = 'file:///' + s[7:]

    url = urlsplit(s)

    netloc = _encode_netloc(url)

    path = urlquote(
        url.path, encoding=charset, safe='/%+$!*\'(),'
    )
    qs = urlquote_plus(
        url.query, encoding=charset, safe=':&%=+$!*\'(),'
    )
    anchor = urlquote_plus(
        url.fragment, encoding=charset, safe=':&%=+$!*\'(),'
    )

    return urlunsplit(
        (url.scheme, netloc, path, qs, anchor)
    )
예제 #4
0
    def getProjectId(self, sourcestamp):
        # retrieve project id via cache
        url = giturlparse(sourcestamp['repository'])
        if url is None:
            return None
        project_full_name = f"{url.owner}/{url.repo}"
        # gitlab needs project name to be fully url quoted to get the project id
        project_full_name = urlquote_plus(project_full_name)

        if project_full_name not in self.project_ids:
            response = yield self._http.get(
                f'/api/v4/projects/{project_full_name}')
            proj = yield response.json()
            if response.code not in (200, ):
                log.msg('Unknown (or hidden) gitlab project'
                        f'{project_full_name}: {proj.get("message")}')
                return None
            self.project_ids[project_full_name] = proj['id']

        return self.project_ids[project_full_name]
예제 #5
0
파일: gitlab.py 프로젝트: ynezz/buildbot
    def getProjectId(self, sourcestamp):
        # retrieve project id via cache
        url = giturlparse(sourcestamp['repository'])
        if url is None:
            return None
        project_full_name = "{}/{}".format(url.owner, url.repo)
        # gitlab needs project name to be fully url quoted to get the project id
        project_full_name = urlquote_plus(project_full_name)

        if project_full_name not in self.project_ids:
            response = yield self._http.get(
                '/api/v4/projects/{}'.format(project_full_name))
            proj = yield response.json()
            if response.code not in (200, ):
                log.msg('Unknown (or hidden) gitlab project'
                        '{repo}: {message}'.format(repo=project_full_name,
                                                   **proj))
                return None
            self.project_ids[project_full_name] = proj['id']

        return self.project_ids[project_full_name]
예제 #6
0
파일: gitlab.py 프로젝트: buildbot/buildbot
    def getProjectId(self, sourcestamp):
        # retrieve project id via cache
        url = giturlparse(sourcestamp['repository'])
        if url is None:
            return None
        project_full_name = "%s/%s" % (url.owner, url.repo)

        # gitlab needs project name to be fully url quoted to get the project id
        project_full_name = urlquote_plus(project_full_name)

        if project_full_name not in self.project_ids:
            response = yield self._http.get('/api/v4/projects/%s' % (project_full_name))
            proj = yield response.json()
            if response.code not in (200, ):
                log.msg(
                    'Unknown (or hidden) gitlab project'
                    '{repo}: {message}'.format(
                        repo=project_full_name, **proj))
                return None
            self.project_ids[project_full_name] = proj['id']

        return self.project_ids[project_full_name]
예제 #7
0
    def create_changes(self, new_logentries):
        changes = []

        for el in new_logentries:
            revision = text_type(el.getAttribute("revision"))

            revlink = u''

            if self.revlinktmpl and revision:
                revlink = self.revlinktmpl % urlquote_plus(revision)
                revlink = text_type(revlink)

            log.msg("Adding change revision %s" % (revision, ))
            author = self._get_text(el, "author")
            comments = self._get_text(el, "msg")
            # there is a "date" field, but it provides localtime in the
            # repository's timezone, whereas we care about buildmaster's
            # localtime (since this will get used to position the boxes on
            # the Waterfall display, etc). So ignore the date field, and
            # addChange will fill in with the current time
            branches = {}
            try:
                pathlist = el.getElementsByTagName("paths")[0]
            except IndexError:  # weird, we got an empty revision
                log.msg("ignoring commit with no paths")
                continue

            for p in pathlist.getElementsByTagName("path"):
                kind = p.getAttribute("kind")
                action = p.getAttribute("action")
                path = u"".join([t.data for t in p.childNodes])
                if path.startswith("/"):
                    path = path[1:]
                if kind == "dir" and not path.endswith("/"):
                    path += "/"
                where = self._transform_path(path)

                # if 'where' is None, the file was outside any project that
                # we care about and we should ignore it
                if where:
                    branch = where.get("branch", None)
                    filename = where["path"]
                    if branch not in branches:
                        branches[branch] = {
                            'files': [],
                            'number_of_directories': 0
                        }
                    if filename == "":
                        # root directory of branch
                        branches[branch]['files'].append(filename)
                        branches[branch]['number_of_directories'] += 1
                    elif filename.endswith("/"):
                        # subdirectory of branch
                        branches[branch]['files'].append(filename[:-1])
                        branches[branch]['number_of_directories'] += 1
                    else:
                        branches[branch]['files'].append(filename)

                    if "action" not in branches[branch]:
                        branches[branch]['action'] = action

                    for key in ("repository", "project", "codebase"):
                        if key in where:
                            branches[branch][key] = where[key]

            for branch in branches:
                action = branches[branch]['action']
                files = branches[branch]['files']

                number_of_directories_changed = branches[branch][
                    'number_of_directories']
                number_of_files_changed = len(files)

                if (action == u'D' and number_of_directories_changed == 1
                        and number_of_files_changed == 1 and files[0] == ''):
                    log.msg("Ignoring deletion of branch '%s'" % branch)
                else:
                    chdict = dict(
                        author=author,
                        # weakly assume filenames are utf-8
                        files=[
                            bytes2unicode(f, 'utf-8', 'replace') for f in files
                        ],
                        comments=comments,
                        revision=revision,
                        branch=util.bytes2unicode(branch),
                        revlink=revlink,
                        category=self.category,
                        repository=util.bytes2unicode(branches[branch].get(
                            'repository', self.repourl)),
                        project=util.bytes2unicode(branches[branch].get(
                            'project', self.project)),
                        codebase=util.bytes2unicode(branches[branch].get(
                            'codebase', None)))
                    changes.append(chdict)

        return changes
예제 #8
0
    def create_changes(self, new_logentries):
        changes = []

        for el in new_logentries:
            revision = str(el.getAttribute("revision"))

            revlink = ''

            if self.revlinktmpl and revision:
                revlink = self.revlinktmpl % urlquote_plus(revision)
                revlink = str(revlink)

            log.msg("Adding change revision %s" % (revision,))
            author = self._get_text(el, "author")
            comments = self._get_text(el, "msg")
            # there is a "date" field, but it provides localtime in the
            # repository's timezone, whereas we care about buildmaster's
            # localtime (since this will get used to position the boxes on
            # the Waterfall display, etc). So ignore the date field, and
            # addChange will fill in with the current time
            branches = {}
            try:
                pathlist = el.getElementsByTagName("paths")[0]
            except IndexError:  # weird, we got an empty revision
                log.msg("ignoring commit with no paths")
                continue

            for p in pathlist.getElementsByTagName("path"):
                kind = p.getAttribute("kind")
                action = p.getAttribute("action")
                path = "".join([t.data for t in p.childNodes])
                if path.startswith("/"):
                    path = path[1:]
                if kind == "dir" and not path.endswith("/"):
                    path += "/"
                where = self._transform_path(path)

                # if 'where' is None, the file was outside any project that
                # we care about and we should ignore it
                if where:
                    branch = where.get("branch", None)
                    filename = where["path"]
                    if branch not in branches:
                        branches[branch] = {
                            'files': [], 'number_of_directories': 0}
                    if filename == "":
                        # root directory of branch
                        branches[branch]['files'].append(filename)
                        branches[branch]['number_of_directories'] += 1
                    elif filename.endswith("/"):
                        # subdirectory of branch
                        branches[branch]['files'].append(filename[:-1])
                        branches[branch]['number_of_directories'] += 1
                    else:
                        branches[branch]['files'].append(filename)

                    if "action" not in branches[branch]:
                        branches[branch]['action'] = action

                    for key in ("repository", "project", "codebase"):
                        if key in where:
                            branches[branch][key] = where[key]

            for branch in branches:
                action = branches[branch]['action']
                files = branches[branch]['files']

                number_of_directories_changed = branches[
                    branch]['number_of_directories']
                number_of_files_changed = len(files)

                if (action == 'D' and number_of_directories_changed == 1 and
                        number_of_files_changed == 1 and files[0] == ''):
                    log.msg("Ignoring deletion of branch '%s'" % branch)
                else:
                    chdict = dict(
                        author=author,
                        # weakly assume filenames are utf-8
                        files=[bytes2unicode(f, 'utf-8', 'replace')
                               for f in files],
                        comments=comments,
                        revision=revision,
                        branch=util.bytes2unicode(branch),
                        revlink=revlink,
                        category=self.category,
                        repository=util.bytes2unicode(
                            branches[branch].get('repository', self.repourl)),
                        project=util.bytes2unicode(
                            branches[branch].get('project', self.project)),
                        codebase=util.bytes2unicode(
                            branches[branch].get('codebase', None)))
                    changes.append(chdict)

        return changes
예제 #9
0
 def test_urlquote_plus():
     """urlquote_plus: UTF-8 and Unicode input produce same output"""
     assert urlquote_plus(b'abc/def ghi') == urlquote_plus(u'abc/def ghi')