Пример #1
0
    def _get_repo_info(repo_and_commit):
        """
        Query cgit for the namespace, username and email of the author.

        :param tuple repo_and_commit: contains the repo and commit to query for
        :return: a JSON string of a dictionary with the keys namespace, author_username,
        author_email, and the commit
        :rtype: str
        """
        repo, commit = repo_and_commit
        log.debug(
            'Attempting to find the cgit URL for the commit "{0}" in repo "{1}"'
            .format(commit, repo))
        session = retry_session()
        rv = {'commit': commit}
        cgit_result = None
        # The tuple of namespaces to try when determining which namespace this git module belongs
        # to since this information isn't stored in GitBZ yet
        namespaces = ('rpms', 'containers', 'modules', 'tests')
        cgit_url = getenv('ESTUARY_CGIT_URL',
                          'http://pkgs.devel.redhat.com/cgit/')
        for namespace in namespaces:
            url = '{0}{1}/{2}/commit/?id={3}&dt=2'.format(
                cgit_url, namespace, repo, commit)
            log.debug('Trying the URL "{0}"'.format(url))
            try:
                cgit_result = session.get(url, timeout=15)
            except ConnectionError:
                log.error('The connection to "{0}" failed'.format(url))
                continue

            if cgit_result.status_code == 200:
                # If the repo is empty, cgit oddly returns a 200 status code, so let's correct the
                # status code so that the remainder of the code knows it's a bad request
                if 'Repository seems to be empty' in cgit_result.text:
                    cgit_result.status_code = 404
                else:
                    # If the repo is populated and a 200 status code is returned, then we can
                    # assume we found the correct repo
                    break

        if not cgit_result or cgit_result.status_code != 200:
            log.error(
                'Couldn\'t find the commit "{0}" for the repo "{1}" in the namespaces: {2}'
                .format(commit, repo, ', '.join(namespaces)))
            return rv

        log.debug(
            'Found the cgit URL "{0}" for the commit "{1}" in repo "{2}"'.
            format(url, commit, repo))
        rv['namespace'] = namespace

        # Start parsing the cgit content
        soup = BeautifulSoup(cgit_result.text, 'html.parser')
        # Workaround for BS4 in EL7 since `soup.find('th', string=person)` doesn't work in
        # that environment
        th_tags = soup.find_all('th')
        data_found = {'author': False}
        for th_tag in th_tags:
            if th_tag.string in ('author'):
                data_found[th_tag.string] = True
                username_key = '{0}_username'.format(th_tag.string)
                email_key = '{0}_email'.format(th_tag.string)
                rv[username_key], rv[
                    email_key] = DistGitScraper._parse_username_email_from_cgit(
                        th_tag, commit, namespace, repo)

            # If all the "th" elements we're interested in were parsed, then break from the loop
            # early
            if all(data_found.values()):
                break

        soup.decompose()
        return rv
Пример #2
0
    def query_api_and_update_neo4j(self):
        """
        Scrape the Freshmaker API and upload the data to Neo4j.

        :param str start_date: a datetime to start scraping data from
        """
        # Initialize session and url
        session = retry_session()
        fm_url = self.freshmaker_url
        while True:
            log.debug('Querying {0}'.format(fm_url))
            try:
                rv_json = session.get(fm_url, timeout=60).json()
            except ConnectionError:
                # TODO: Remove this once FACTORY-3955 is resolved
                log.error(
                    'The connection to Freshmaker at %s failed. Skipping the rest of the scraper.',
                    fm_url,
                )
                break

            for fm_event in rv_json['items']:
                try:
                    int(fm_event['search_key'])
                except ValueError:
                    # Skip Freshmaker Events that don't have the search_key as the Advisory ID
                    continue
                log.debug('Creating FreshmakerEvent {0}'.format(fm_event['id']))
                event_params = dict(
                    id_=fm_event['id'],
                    event_type_id=fm_event['event_type_id'],
                    message_id=fm_event['message_id'],
                    state=fm_event['state'],
                    state_name=fm_event['state_name'],
                    state_reason=fm_event['state_reason'],
                    url=fm_event['url']
                )
                if fm_event.get('time_created'):
                    event_params['time_created'] = timestamp_to_datetime(fm_event['time_created'])
                if fm_event.get('time_done'):
                    event_params['time_done'] = timestamp_to_datetime(fm_event['time_created'])
                event = FreshmakerEvent.create_or_update(event_params)[0]

                log.debug('Creating Advisory {0}'.format(fm_event['search_key']))
                advisory = Advisory.get_or_create(dict(
                    id_=fm_event['search_key']
                ))[0]

                event.conditional_connect(event.triggered_by_advisory, advisory)

                for build_dict in fm_event['builds']:
                    # To handle a faulty container build in Freshmaker
                    if build_dict['build_id'] and int(build_dict['build_id']) < 0:
                        continue
                    log.debug('Creating FreshmakerBuild {0}'.format(build_dict['build_id']))
                    fb_params = dict(
                        id_=build_dict['id'],
                        dep_on=build_dict['dep_on'],
                        name=build_dict['name'],
                        original_nvr=build_dict['original_nvr'],
                        rebuilt_nvr=build_dict['rebuilt_nvr'],
                        state=build_dict['state'],
                        state_name=build_dict['state_name'],
                        state_reason=build_dict['state_reason'],
                        time_submitted=timestamp_to_datetime(build_dict['time_submitted']),
                        type_=build_dict['type'],
                        type_name=build_dict['type_name'],
                        url=build_dict['url']
                    )
                    if build_dict['time_completed']:
                        fb_params['time_completed'] = timestamp_to_datetime(
                            build_dict['time_completed'])
                    if build_dict['build_id']:
                        fb_params['build_id'] = build_dict['build_id']
                    fb = FreshmakerBuild.create_or_update(fb_params)[0]
                    event.requested_builds.connect(fb)

                    # The build ID obtained from Freshmaker API is actually a Koji task ID
                    task_result = None
                    if build_dict['build_id']:
                        task_result = self.get_koji_task_result(build_dict['build_id'])

                    if not task_result:
                        continue

                    # Extract the build ID from a task result
                    xml_root = ET.fromstring(task_result)
                    # TODO: Change this if a task can trigger multiple builds
                    try:
                        build_id = xml_root.find(".//*[name='koji_builds'].//string").text
                    except AttributeError:
                        build_id = None

                    if not build_id:
                        continue

                    log.debug('Creating ContainerKojiBuild {0}'.format(build_id))
                    build_params = {
                        'id_': build_id,
                        'original_nvr': build_dict['original_nvr']
                    }
                    try:
                        build = ContainerKojiBuild.create_or_update(build_params)[0]
                    except neomodel.exceptions.ConstraintValidationFailed:
                        # This must have errantly been created as a KojiBuild instead of a
                        # ContainerKojiBuild, so let's fix that.
                        build = KojiBuild.nodes.get_or_none(id_=build_id)
                        if not build:
                            # If there was a constraint validation failure and the build isn't just
                            # the wrong label, then we can't recover.
                            raise
                        build.add_label(ContainerKojiBuild.__label__)
                        build = ContainerKojiBuild.create_or_update(build_params)[0]

                    event.successful_koji_builds.connect(build)

            if rv_json['meta'].get('next'):
                fm_url = rv_json['meta']['next']
            else:
                break
Пример #3
0
    def query_api_and_update_neo4j(self):
        """
        Scrape the Freshmaker API and upload the data to Neo4j.

        :param str start_date: a datetime to start scraping data from
        """
        # Initialize session and url
        session = retry_session()
        fm_url = self.freshmaker_url
        while True:
            log.debug('Querying {0}'.format(fm_url))
            rv_json = session.get(fm_url, timeout=15).json()
            for fm_event in rv_json['items']:
                try:
                    int(fm_event['search_key'])
                except ValueError:
                    # Skip Freshmaker Events that don't have the search_key as the Advisory ID
                    continue
                event = FreshmakerEvent.create_or_update(dict(
                    id_=fm_event['id'],
                    event_type_id=fm_event['event_type_id'],
                    message_id=fm_event['message_id'],
                    state=fm_event['state'],
                    state_name=fm_event['state_name'],
                    state_reason=fm_event['state_reason'],
                    url=fm_event['url']
                ))[0]

                advisory = Advisory.get_or_create(dict(
                    id_=fm_event['search_key']
                ))[0]

                event.conditional_connect(event.triggered_by_advisory, advisory)

                for build_dict in fm_event['builds']:
                    # To handle a faulty container build in Freshmaker
                    if not build_dict['build_id'] or int(build_dict['build_id']) < 0:
                        continue

                    # The build ID obtained from Freshmaker API is actually a Koji task ID
                    task_result = self.get_koji_task_result(build_dict['build_id'])
                    if not task_result:
                        continue

                    # Extract the build ID from a task result
                    xml_root = ET.fromstring(task_result)
                    # TODO: Change this if a task can trigger multiple builds
                    try:
                        build_id = xml_root.find(".//*[name='koji_builds'].//string").text
                    except AttributeError:
                        build_id = None

                    if build_id:
                        build = ContainerKojiBuild.get_or_create(dict(
                            id_=build_id,
                            original_nvr=build_dict['original_nvr']
                        ))[0]
                        event.triggered_container_builds.connect(build)

            if rv_json['meta'].get('next'):
                fm_url = rv_json['meta']['next']
            else:
                break