def query_api_and_update_neo4j(self): """ Scrape the Freshmaker API and upload the data to Neo4j. :param str start_date: a datetime to start scraping data from """ # Initialize session and url session = retry_session() fm_url = self.freshmaker_url while True: log.debug('Querying {0}'.format(fm_url)) try: rv_json = session.get(fm_url, timeout=60).json() except ConnectionError: # TODO: Remove this once FACTORY-3955 is resolved log.error( 'The connection to Freshmaker at %s failed. Skipping the rest of the scraper.', fm_url, ) break for fm_event in rv_json['items']: try: int(fm_event['search_key']) except ValueError: # Skip Freshmaker Events that don't have the search_key as the Advisory ID continue log.debug('Creating FreshmakerEvent {0}'.format(fm_event['id'])) event_params = dict( id_=fm_event['id'], event_type_id=fm_event['event_type_id'], message_id=fm_event['message_id'], state=fm_event['state'], state_name=fm_event['state_name'], state_reason=fm_event['state_reason'], url=fm_event['url'] ) if fm_event.get('time_created'): event_params['time_created'] = timestamp_to_datetime(fm_event['time_created']) if fm_event.get('time_done'): event_params['time_done'] = timestamp_to_datetime(fm_event['time_created']) event = FreshmakerEvent.create_or_update(event_params)[0] log.debug('Creating Advisory {0}'.format(fm_event['search_key'])) advisory = Advisory.get_or_create(dict( id_=fm_event['search_key'] ))[0] event.conditional_connect(event.triggered_by_advisory, advisory) for build_dict in fm_event['builds']: # To handle a faulty container build in Freshmaker if build_dict['build_id'] and int(build_dict['build_id']) < 0: continue log.debug('Creating FreshmakerBuild {0}'.format(build_dict['build_id'])) fb_params = dict( id_=build_dict['id'], dep_on=build_dict['dep_on'], name=build_dict['name'], original_nvr=build_dict['original_nvr'], rebuilt_nvr=build_dict['rebuilt_nvr'], state=build_dict['state'], state_name=build_dict['state_name'], state_reason=build_dict['state_reason'], time_submitted=timestamp_to_datetime(build_dict['time_submitted']), type_=build_dict['type'], type_name=build_dict['type_name'], url=build_dict['url'] ) if build_dict['time_completed']: fb_params['time_completed'] = timestamp_to_datetime( build_dict['time_completed']) if build_dict['build_id']: fb_params['build_id'] = build_dict['build_id'] fb = FreshmakerBuild.create_or_update(fb_params)[0] event.requested_builds.connect(fb) # The build ID obtained from Freshmaker API is actually a Koji task ID task_result = None if build_dict['build_id']: task_result = self.get_koji_task_result(build_dict['build_id']) if not task_result: continue # Extract the build ID from a task result xml_root = ET.fromstring(task_result) # TODO: Change this if a task can trigger multiple builds try: build_id = xml_root.find(".//*[name='koji_builds'].//string").text except AttributeError: build_id = None if not build_id: continue log.debug('Creating ContainerKojiBuild {0}'.format(build_id)) build_params = { 'id_': build_id, 'original_nvr': build_dict['original_nvr'] } try: build = ContainerKojiBuild.create_or_update(build_params)[0] except neomodel.exceptions.ConstraintValidationFailed: # This must have errantly been created as a KojiBuild instead of a # ContainerKojiBuild, so let's fix that. build = KojiBuild.nodes.get_or_none(id_=build_id) if not build: # If there was a constraint validation failure and the build isn't just # the wrong label, then we can't recover. raise build.add_label(ContainerKojiBuild.__label__) build = ContainerKojiBuild.create_or_update(build_params)[0] event.successful_koji_builds.connect(build) if rv_json['meta'].get('next'): fm_url = rv_json['meta']['next'] else: break
def update_neo4j(self, builds): """ Update Neo4j with Koji build information from Teiid. :param list builds: a list of dictionaries """ # Uploads builds data to their respective nodes log.info('Beginning to upload data to Neo4j') count = 0 for build_dict in builds: build_params = dict( id_=build_dict['id'], epoch=build_dict['epoch'], state=build_dict['state'], creation_time=build_dict['creation_time'], start_time=build_dict['start_time'], completion_time=build_dict['completion_time'], extra=build_dict['extra'], name=build_dict['package_name'], version=build_dict['version'], release=build_dict['release'] ) package_name = build_dict['package_name'] try: extra_json = json.loads(build_dict['extra']) except (ValueError, TypeError): extra_json = {} container_build = False # Checking a heuristic for determining if a build is a container build since, currently # there is no definitive way to do it. if extra_json and extra_json.get('container_koji_build_id'): container_build = True # Checking another heuristic for determining if a build is a container build since # currently there is no definitive way to do it. elif (package_name.endswith('-container') or package_name.endswith('-docker')): container_build = True if container_build: build = ContainerKojiBuild.create_or_update(build_params)[0] else: build = KojiBuild.create_or_update(build_params)[0] if build_dict['owner_username']: username = build_dict['owner_username'].split('@')[0] else: username = build_dict['owner_name'] user = User.get_or_create(dict(username=username))[0] build.conditional_connect(build.owner, user) tags = self.get_build_tags(build_dict['id']) current_tag_ids = set() for _tag in tags: current_tag_ids.add(_tag['tag_id']) tag = KojiTag.create_or_update(dict( id_=_tag['tag_id'], name=_tag['tag_name'] ))[0] tag.builds.connect(build) # _tag.id_ must be cast as an int because it is stored as a string in Neo4j since # it's a UniqueIdProperty connected_tags = {int(_tag.id_): _tag for _tag in build.tags.all()} extra_connected_tag_ids = set(connected_tags.keys()) - current_tag_ids for tag_id in extra_connected_tag_ids: build.tags.disconnect(connected_tags[tag_id]) count += 1 log.info('Uploaded {0} builds out of {1}'.format(count, len(builds))) try: extra_json = json.loads(build_dict['extra']) except (ValueError, TypeError): extra_json = {} container_koji_task_id = extra_json.get('container_koji_task_id') if build_dict['task_id']: task_id = build_dict['task_id'] elif container_koji_task_id: task_id = container_koji_task_id else: # Continue if the task_id is None continue # Getting task related to the current build task_dict = self.get_task(task_id)[0] xml_root = ET.fromstring(task_dict['request']) commit_hash = None for child in xml_root.iter('string'): if child.text and child.text.startswith('git'): commit_hash = child.text.rsplit('#', 1)[1] break if not task_dict: # Continue if no corresponding task found continue task = KojiTask.create_or_update(dict( id_=task_dict['id'], weight=task_dict['weight'], create_time=task_dict['create_time'], start_time=task_dict['start_time'], completion_time=task_dict['completion_time'], state=task_dict['state'], priority=task_dict['priority'], arch=task_dict['arch'], method=task_dict['method'] ))[0] # Defining Relationships task.builds.connect(build) task.conditional_connect(task.owner, user) if commit_hash: commit = DistGitCommit.get_or_create(dict(hash_=commit_hash))[0] build.conditional_connect(build.commit, commit) child_tasks = self.get_task_children(task_dict['id']) if not child_tasks: # Continue if no corresponding child task found continue for child_task_dict in child_tasks: child_task = KojiTask.create_or_update(dict( id_=child_task_dict['id'], weight=child_task_dict['weight'], create_time=child_task_dict['create_time'], start_time=child_task_dict['start_time'], completion_time=child_task_dict['completion_time'], state=child_task_dict['state'], priority=child_task_dict['priority'], arch=child_task_dict['arch'], method=child_task_dict['method'] ))[0] child_task.conditional_connect(child_task.parent, task)
def get_or_create_build(self, identifier, original_nvr=None, force_container_label=False): """ Get a Koji build from Neo4j, or create it if it does not exist in Neo4j. :param str/int identifier: an NVR (str) or build ID (int), or a dict of info from Koji API :kwarg str original_nvr: original_nvr property for the ContainerKojiBuild :kwarg bool force_container_label: when true, this skips the check to see if the build is a container and just creates the build with the ContainerKojiBuild label :rtype: KojiBuild :return: the Koji Build retrieved or created from Neo4j """ if type(identifier) is dict: build_info = identifier else: try: build_info = self.koji_session.getBuild(identifier, strict=True) except Exception: log.error( 'Failed to get brew build using the identifier {0}'.format( identifier)) raise build_params = { 'epoch': build_info['epoch'], 'id_': str(build_info['id']), 'name': build_info['package_name'], 'release': build_info['release'], 'state': build_info['state'], 'version': build_info['version'] } if build_info.get('extra'): build_params['extra'] = json.dumps(build_info['extra']) # To handle the case when a message has a null timestamp for time_key in ('completion_time', 'creation_time', 'start_time'): # Certain Koji API endpoints omit the *_ts values but have the *_time values, so that's # why the *_time values are used if build_info[time_key]: ts_format = r'%Y-%m-%d %H:%M:%S' if len(build_info[time_key].rsplit('.', 1)) == 2: # If there are microseconds, go ahead and parse that too ts_format += r'.%f' build_params[time_key] = datetime.strptime( build_info[time_key], ts_format) owner = User.create_or_update({ 'username': build_info['owner_name'], 'email': '{0}@redhat.com'.format(build_info['owner_name']) })[0] if force_container_label or self.is_container_build(build_info): if original_nvr: build_params['original_nvr'] = original_nvr build = ContainerKojiBuild.create_or_update(build_params)[0] elif self.is_module_build(build_info): module_extra_info = build_info['extra'].get('typeinfo', {}).get('module') build_params['context'] = module_extra_info.get('context') build_params['mbs_id'] = module_extra_info.get( 'module_build_service_id') build_params['module_name'] = module_extra_info.get('name') build_params['module_stream'] = module_extra_info.get('stream') build_params['module_version'] = module_extra_info.get('version') try: build = ModuleKojiBuild.create_or_update(build_params)[0] except neomodel.exceptions.ConstraintValidationFailed: # This must have errantly been created as a KojiBuild instead of a # ModuleKojiBuild, so let's fix that. build = KojiBuild.nodes.get_or_none(id_=build_params['id_']) if not build: # If there was a constraint validation failure and the build isn't just the # wrong label, then we can't recover. raise build.add_label(ModuleKojiBuild.__label__) build = ModuleKojiBuild.create_or_update(build_params)[0] else: build = KojiBuild.create_or_update(build_params)[0] build.conditional_connect(build.owner, owner) return build
def update_neo4j(self, builds): """ Update Neo4j with Koji build information from Teiid. :param list builds: a list of dictionaries """ # Uploads builds data to their respective nodes log.info('Beginning to upload data to Neo4j') count = 0 for build_dict in builds: build_params = dict( id_=build_dict['id'], epoch=build_dict['epoch'], state=build_dict['state'], creation_time=build_dict['creation_time'], start_time=build_dict['start_time'], completion_time=build_dict['completion_time'], name=build_dict['package_name'], version=build_dict['version'], release=build_dict['release'] ) try: extra_json = json.loads(build_dict['extra']) except (ValueError, TypeError): extra_json = {} if self.is_container_build(build_dict): build_params['operator'] = bool( extra_json.get('typeinfo', {}).get('operator-manifests', {}).get('archive') ) try: build = ContainerKojiBuild.create_or_update(build_params)[0] except neomodel.exceptions.ConstraintValidationFailed: # This must have errantly been created as a KojiBuild instead of a # ContainerKojiBuild, so let's fix that. build = KojiBuild.nodes.get_or_none(id_=build_params['id_']) if not build: # If there was a constraint validation failure and the build isn't just the # wrong label, then we can't recover. raise build.add_label(ContainerKojiBuild.__label__) build = ContainerKojiBuild.create_or_update(build_params)[0] elif self.is_module_build(build_dict): module_extra_info = extra_json.get('typeinfo', {}).get('module') try: build_params['context'] = module_extra_info.get('context') build_params['mbs_id'] = module_extra_info.get('module_build_service_id') build_params['module_name'] = module_extra_info.get('name') build_params['module_stream'] = module_extra_info.get('stream') build_params['module_version'] = module_extra_info.get('version') build = ModuleKojiBuild.create_or_update(build_params)[0] except neomodel.exceptions.ConstraintValidationFailed: # This must have errantly been created as a KojiBuild instead of a # ModuleKojiBuild, so let's fix that. build = KojiBuild.nodes.get_or_none(id_=build_params['id_']) if not build: # If there was a constraint validation failure and the build isn't just the # wrong label, then we can't recover. raise build.add_label(ModuleKojiBuild.__label__) build = ModuleKojiBuild.create_or_update(build_params)[0] else: build = KojiBuild.create_or_update(build_params)[0] username = build_dict['owner_name'] user = User.get_or_create(dict(username=username))[0] build.conditional_connect(build.owner, user) if build.__label__ == ModuleKojiBuild.__label__: module_build_tag_name = module_extra_info.get('content_koji_tag') if module_build_tag_name: module_components = self.get_tag_info(module_build_tag_name) # Some modules don't have components if module_components: for item in module_components: module_component = KojiBuild.get_or_create(dict( id_=item['build_id'] ))[0] build.components.connect(module_component) component_builds = self.get_build_info( [item['build_id'] for item in module_components]) self.update_neo4j(component_builds) count += 1 log.info('Uploaded {0} builds out of {1}'.format(count, len(builds))) container_koji_task_id = extra_json.get('container_koji_task_id') if build_dict['task_id']: task_id = build_dict['task_id'] elif container_koji_task_id: task_id = container_koji_task_id else: # Continue if the task_id is None continue # Getting task related to the current build try: task_dict = self.get_task(task_id)[0] except IndexError: continue commit_hash = None # Only look for the commit hash if the build is an RPM or container if task_dict['method'] in ('build', 'buildContainer'): xml_root = ET.fromstring(task_dict['request']) for child in xml_root.iter('string'): if child.text and child.text.startswith('git'): commit_hash = child.text.rsplit('#', 1)[1] break if commit_hash: commit = DistGitCommit.get_or_create(dict(hash_=commit_hash))[0] build.conditional_connect(build.commit, commit)