示例#1
0
    def _parse_username_email_from_cgit(th_tag, commit, namespace, repo):
        """
        Parse the username and email address from a cgit "th" element of author.

        :param th_tag: a BeautifulSoup4 element object
        :param str commit: the commit being processed
        :param str namespace: the namespace of the repo being processed
        :param str repo: the repo being processed
        :return: a tuple of (username, email)
        :rtype: tuple
        """
        person_text = th_tag.next_sibling.string
        # Set some defaults in the event the cgit entry is malformed
        username = None
        email = None

        if person_text:
            match = re.match(
                r'^.+<(?P<email>(?P<username>.+)@(?P<domain>.+))>$', person_text)
            if match:
                match_dict = match.groupdict()
                if match_dict['domain'].lower() == 'redhat.com':
                    username = match_dict['username'].lower()
                else:
                    # If the email isn't a Red Hat email address, then use the whole email address
                    # as the username. This should only happen with erroneous git configurations.
                    username = match_dict['email'].lower()
                email = match_dict['email'].lower()

        if username is None or email is None:
            log.error('Couldn\'t find the {0} for the commit "{1}" on repo "{2}/{3}"'.format(
                      th_tag.string, commit, namespace, repo))

        return username, email
示例#2
0
    def query_api_and_update_neo4j(self):
        """
        Scrape the Freshmaker API and upload the data to Neo4j.

        :param str start_date: a datetime to start scraping data from
        """
        # Initialize session and url
        session = retry_session()
        fm_url = self.freshmaker_url
        while True:
            log.debug('Querying {0}'.format(fm_url))
            try:
                rv_json = session.get(fm_url, timeout=60).json()
            except ConnectionError:
                # TODO: Remove this once FACTORY-3955 is resolved
                log.error(
                    'The connection to Freshmaker at %s failed. Skipping the rest of the scraper.',
                    fm_url,
                )
                break

            for fm_event in rv_json['items']:
                try:
                    int(fm_event['search_key'])
                except ValueError:
                    # Skip Freshmaker Events that don't have the search_key as the Advisory ID
                    continue
                log.debug('Creating FreshmakerEvent {0}'.format(fm_event['id']))
                event_params = dict(
                    id_=fm_event['id'],
                    event_type_id=fm_event['event_type_id'],
                    message_id=fm_event['message_id'],
                    state=fm_event['state'],
                    state_name=fm_event['state_name'],
                    state_reason=fm_event['state_reason'],
                    url=fm_event['url']
                )
                if fm_event.get('time_created'):
                    event_params['time_created'] = timestamp_to_datetime(fm_event['time_created'])
                if fm_event.get('time_done'):
                    event_params['time_done'] = timestamp_to_datetime(fm_event['time_created'])
                event = FreshmakerEvent.create_or_update(event_params)[0]

                log.debug('Creating Advisory {0}'.format(fm_event['search_key']))
                advisory = Advisory.get_or_create(dict(
                    id_=fm_event['search_key']
                ))[0]

                event.conditional_connect(event.triggered_by_advisory, advisory)

                for build_dict in fm_event['builds']:
                    # To handle a faulty container build in Freshmaker
                    if build_dict['build_id'] and int(build_dict['build_id']) < 0:
                        continue
                    log.debug('Creating FreshmakerBuild {0}'.format(build_dict['build_id']))
                    fb_params = dict(
                        id_=build_dict['id'],
                        dep_on=build_dict['dep_on'],
                        name=build_dict['name'],
                        original_nvr=build_dict['original_nvr'],
                        rebuilt_nvr=build_dict['rebuilt_nvr'],
                        state=build_dict['state'],
                        state_name=build_dict['state_name'],
                        state_reason=build_dict['state_reason'],
                        time_submitted=timestamp_to_datetime(build_dict['time_submitted']),
                        type_=build_dict['type'],
                        type_name=build_dict['type_name'],
                        url=build_dict['url']
                    )
                    if build_dict['time_completed']:
                        fb_params['time_completed'] = timestamp_to_datetime(
                            build_dict['time_completed'])
                    if build_dict['build_id']:
                        fb_params['build_id'] = build_dict['build_id']
                    fb = FreshmakerBuild.create_or_update(fb_params)[0]
                    event.requested_builds.connect(fb)

                    # The build ID obtained from Freshmaker API is actually a Koji task ID
                    task_result = None
                    if build_dict['build_id']:
                        task_result = self.get_koji_task_result(build_dict['build_id'])

                    if not task_result:
                        continue

                    # Extract the build ID from a task result
                    xml_root = ET.fromstring(task_result)
                    # TODO: Change this if a task can trigger multiple builds
                    try:
                        build_id = xml_root.find(".//*[name='koji_builds'].//string").text
                    except AttributeError:
                        build_id = None

                    if not build_id:
                        continue

                    log.debug('Creating ContainerKojiBuild {0}'.format(build_id))
                    build_params = {
                        'id_': build_id,
                        'original_nvr': build_dict['original_nvr']
                    }
                    try:
                        build = ContainerKojiBuild.create_or_update(build_params)[0]
                    except neomodel.exceptions.ConstraintValidationFailed:
                        # This must have errantly been created as a KojiBuild instead of a
                        # ContainerKojiBuild, so let's fix that.
                        build = KojiBuild.nodes.get_or_none(id_=build_id)
                        if not build:
                            # If there was a constraint validation failure and the build isn't just
                            # the wrong label, then we can't recover.
                            raise
                        build.add_label(ContainerKojiBuild.__label__)
                        build = ContainerKojiBuild.create_or_update(build_params)[0]

                    event.successful_koji_builds.connect(build)

            if rv_json['meta'].get('next'):
                fm_url = rv_json['meta']['next']
            else:
                break
示例#3
0
    def _get_repo_info(repo_and_commit):
        """
        Query cgit for the namespace, username and email of the author.

        :param tuple repo_and_commit: contains the repo and commit to query for
        :return: a JSON string of a dictionary with the keys namespace, author_username,
        author_email, and the commit
        :rtype: str
        """
        repo, commit = repo_and_commit
        log.debug(
            'Attempting to find the cgit URL for the commit "{0}" in repo "{1}"'
            .format(commit, repo))
        session = retry_session()
        rv = {'commit': commit}
        cgit_result = None
        # The tuple of namespaces to try when determining which namespace this git module belongs
        # to since this information isn't stored in GitBZ yet
        namespaces = ('rpms', 'containers', 'modules', 'tests')
        cgit_url = getenv('ESTUARY_CGIT_URL',
                          'http://pkgs.devel.redhat.com/cgit/')
        for namespace in namespaces:
            url = '{0}{1}/{2}/commit/?id={3}&dt=2'.format(
                cgit_url, namespace, repo, commit)
            log.debug('Trying the URL "{0}"'.format(url))
            try:
                cgit_result = session.get(url, timeout=15)
            except ConnectionError:
                log.error('The connection to "{0}" failed'.format(url))
                continue

            if cgit_result.status_code == 200:
                # If the repo is empty, cgit oddly returns a 200 status code, so let's correct the
                # status code so that the remainder of the code knows it's a bad request
                if 'Repository seems to be empty' in cgit_result.text:
                    cgit_result.status_code = 404
                else:
                    # If the repo is populated and a 200 status code is returned, then we can
                    # assume we found the correct repo
                    break

        if not cgit_result or cgit_result.status_code != 200:
            log.error(
                'Couldn\'t find the commit "{0}" for the repo "{1}" in the namespaces: {2}'
                .format(commit, repo, ', '.join(namespaces)))
            return rv

        log.debug(
            'Found the cgit URL "{0}" for the commit "{1}" in repo "{2}"'.
            format(url, commit, repo))
        rv['namespace'] = namespace

        # Start parsing the cgit content
        soup = BeautifulSoup(cgit_result.text, 'html.parser')
        # Workaround for BS4 in EL7 since `soup.find('th', string=person)` doesn't work in
        # that environment
        th_tags = soup.find_all('th')
        data_found = {'author': False}
        for th_tag in th_tags:
            if th_tag.string in ('author'):
                data_found[th_tag.string] = True
                username_key = '{0}_username'.format(th_tag.string)
                email_key = '{0}_email'.format(th_tag.string)
                rv[username_key], rv[
                    email_key] = DistGitScraper._parse_username_email_from_cgit(
                        th_tag, commit, namespace, repo)

            # If all the "th" elements we're interested in were parsed, then break from the loop
            # early
            if all(data_found.values()):
                break

        soup.decompose()
        return rv
示例#4
0
def _get_exception_users():
    """
    Get the list of users that are explicitly whitelisted.

    If the LDAP search fails, an empty set is returned.

    :return: a set of usernames
    :rtype: set
    :raise InternalServerError: if a required configuration value is not set or the connection to
        the LDAP server fails
    """
    # Import this here so it's not required for deployments with auth disabled
    import ldap3

    base_error = '%s is not set in the server configuration'
    ldap_uri = current_app.config.get('LDAP_URI')
    if not ldap_uri:
        log.error(base_error, 'LDAP_URI')
        raise InternalServerError()

    ldap_group_dn = current_app.config.get('LDAP_EXCEPTIONS_GROUP_DN')
    if not ldap_group_dn:
        log.error(base_error, 'LDAP_EXCEPTIONS_GROUP_DN')
        raise InternalServerError()

    if ldap_uri.startswith('ldaps://'):
        ca = current_app.config['LDAP_CA_CERTIFICATE']
        log.debug('Connecting to %s using SSL and the CA %s', ldap_uri, ca)
        tls = ldap3.Tls(ca_certs_file=ca, validate=ssl.CERT_REQUIRED)
        server = ldap3.Server(ldap_uri, use_ssl=True, tls=tls)
    else:
        log.debug('Connecting to %s without SSL', ldap_uri)
        server = ldap3.Server(ldap_uri)

    connection = ldap3.Connection(server)
    try:
        connection.open()
    except ldap3.core.exceptions.LDAPSocketOpenError:
        log.exception('The connection to %s failed', ldap_uri)
        raise InternalServerError()

    membership_attr = current_app.config['LDAP_GROUP_MEMBERSHIP_ATTRIBUTE']
    log.debug('Searching for the attribute %s on %s', ldap_group_dn,
              membership_attr)
    # Set the scope to base so only the group from LDAP_GROUP_DN is returned
    success = connection.search(ldap_group_dn,
                                '(cn=*)',
                                search_scope=ldap3.BASE,
                                attributes=[membership_attr])
    if not success:
        log.error(
            'The user exceptions list could not be determined because the search for the attribute '
            '%s on %s failed with %r',
            membership_attr,
            ldap_group_dn,
            connection.response,
        )
        return set()

    return set([
        dn.split('=')[1].split(',')[0]
        for dn in connection.response[0]['attributes'][membership_attr]
    ])