def get_latest_comment_date(self, item): """Get the date of the latest comment on the issue/pr""" comment_dates = [ str_to_datetime(comment['created_at']) for comment in item['comments_data'] ] if comment_dates: return max(comment_dates) return None
def __add_milestone_info(self, item_data, eitem): """Add milestone tag, start date and due date to the enriched item""" eitem['milestone'] = NO_MILESTONE_TAG eitem['milestone_start_date'] = None eitem['milestone_due_date'] = None if 'milestone' in item_data and item_data['milestone']: eitem['milestone'] = item_data['milestone']['title'] milestone = item_data['milestone'] start_date_str = milestone.get('start_date', None) due_date_str = milestone.get('due_date', None) if start_date_str: eitem['milestone_start_date'] = str_to_datetime(start_date_str).replace(tzinfo=None).isoformat() if due_date_str: eitem['milestone_due_date'] = str_to_datetime(due_date_str).replace(tzinfo=None).isoformat()
def __fix_field_date(self, item, attribute): """Fix possible errors in the field date""" field_date = str_to_datetime(item[attribute]) try: _ = int(field_date.strftime("%z")[0:3]) except ValueError: logger.warning("%s in commit %s has a wrong format", attribute, item['commit']) item[attribute] = field_date.replace(tzinfo=None).isoformat()
def fix_field_date(date_value): """Fix possible errors in the field date""" field_date = str_to_datetime(date_value) try: _ = int(field_date.strftime("%z")[0:3]) except ValueError: field_date = field_date.replace(tzinfo=None) return field_date.isoformat()
def get_time_to_merge_request_response(self, item): """Get the first date at which a review was made on the PR by someone other than the user who created the PR """ review_dates = [ str_to_datetime(review['created_at']) for review in item['review_comments_data'] if item['user']['login'] != review['user']['login'] ] if review_dates: return min(review_dates) return None
def metadata_updated_on(item): """Extracts the update time from a Kitsune item. The timestamp is extracted from 'updated' field. This date is a UNIX timestamp but needs to be converted to a float value. :param item: item generated by the backend :returns: a UNIX timestamp """ return float(str_to_datetime(item['updated']).timestamp())
def get_grimoire_fields(self, creation_date, item_name): """ Return common grimoire fields for all data sources """ grimoire_date = None try: grimoire_date = str_to_datetime(creation_date).isoformat() except Exception as ex: pass name = "is_" + self.get_connector_name() + "_" + item_name return {"grimoire_creation_date": grimoire_date, name: 1}
def metadata_updated_on(item): """Extracts the update time from a Rocket.Chat item. The timestamp is extracted from 'ts' field, and then converted into a UNIX timestamp. :param item: item generated by the backend :returns: extracted timestamp """ ts = str_to_datetime(item['_updatedAt']).timestamp() return ts
def get_time_to_first_attention(self, item): """Get the first date at which a comment was made to the issue by someone other than the user who created the issue """ comment_dates = [ str_to_datetime(comment['created_at']) for comment in item['comments_data'] if item['user']['login'] != comment['user']['login'] ] if comment_dates: return min(comment_dates) return None
def get_time_first_review(self, review): """Get the first date at which a review was made on the changeset by someone other than the user who created the changeset """ changeset_owner = review.get('owner', None) changeset_owner_username = changeset_owner.get('username', None) if changeset_owner else None changeset_owner_email = changeset_owner.get('email', None) if changeset_owner else None changeset_created_on = str_to_datetime(review['createdOn']).isoformat() first_review = None patchsets = review.get('patchSets', []) for patchset in patchsets: approvals = patchset.get('approvals', []) for approval in approvals: if approval['type'] != CODE_REVIEW_TYPE: continue approval_granted_on = str_to_datetime(approval['grantedOn']).isoformat() if approval_granted_on < changeset_created_on: continue approval_by = approval.get('by', None) approval_by_username = approval_by.get('username', None) if approval_by else None approval_by_email = approval_by.get('email', None) if approval_by else None if approval_by_username and changeset_owner_username: first_review = approval['grantedOn'] if approval_by_username != changeset_owner_username else None elif approval_by_email and changeset_owner_email: first_review = approval['grantedOn'] if approval_by_email != changeset_owner_email else None else: # if changeset_owner or approval_by is None first_review = approval['grantedOn'] if first_review: return first_review return first_review
def fetch_items(self, category, **kwargs): """Fetch the tweets :param category: the category of items to fetch :param kwargs: backend arguments :returns: a generator of items """ since_id = kwargs['since_id'] max_id = kwargs['max_id'] geocode = kwargs['geocode'] lang = kwargs['lang'] entities = kwargs['include_entities'] tweets_type = kwargs['result_type'] logger.info("Fetching tweets %s from %s to %s", self.query, str(since_id), str(max_id) if max_id else '--') tweets_ids = [] min_date = None max_date = None group_tweets = self.client.tweets(self.query, since_id=since_id, max_id=max_id, geocode=geocode, lang=lang, include_entities=entities, result_type=tweets_type) for tweets in group_tweets: for i in range(len(tweets)): tweet = tweets[i] tweets_ids.append(tweet['id']) if tweets[-1] == tweet: min_date = str_to_datetime(tweets[-1]['created_at']) if tweets[0] == tweet and not max_date: max_date = str_to_datetime(tweets[0]['created_at']) yield tweet logger.info("Fetch process completed: %s (unique %s) tweets fetched, from %s to %s", len(tweets_ids), len(list(set(tweets_ids))), min_date, max_date)
def test_archive(self): """Test whether a set of items is fetched from the archive""" manager = ArchiveManager(self.test_path) category = 'mock_item' args = { 'origin': 'http://example.com/', 'tag': 'test', 'subtype': 'mocksubtype', 'from-date': str_to_datetime('2015-01-01') } # First, fetch the items twice to check if several archive # are used items = fetch(CommandBackend, args, category, manager=manager) items = [item for item in items] self.assertEqual(len(items), 5) items = fetch(CommandBackend, args, category, manager=manager) items = [item for item in items] self.assertEqual(len(items), 5) # Fetch items from the archive items = fetch_from_archive(CommandBackend, args, manager, category, str_to_datetime('1970-01-01')) items = [item for item in items] self.assertEqual(len(items), 10) for x in range(2): for y in range(5): item = items[y + (x * 5)] expected_uuid = uuid('http://example.com/', str(y)) self.assertEqual(item['data']['item'], y) self.assertEqual(item['data']['archive'], True) self.assertEqual(item['origin'], 'http://example.com/') self.assertEqual(item['uuid'], expected_uuid) self.assertEqual(item['tag'], 'test')
def archived_after(self, value): if value is None: self._archived_after = None elif isinstance(value, datetime.datetime): self._archived_after = datetime_to_utc(value) elif isinstance(value, str): try: self._archived_after = str_to_datetime(value) except InvalidDateError as e: raise ValueError("'archived_after' is invalid; %s" % str(e)) else: raise ValueError("'archived_after' must be either a str or a datetime; %s given" % str(type(value)))
def test_remove_archive_on_error(self): """Test whether an archive is removed when an unhandled exception occurs""" manager = ArchiveManager(self.test_path) category = 'mock_item' args = { 'origin': 'http://example.com/', 'tag': 'test', 'subtype': 'mocksubtype', 'from-date': str_to_datetime('2015-01-01') } items = fetch(ErrorCommandBackend, args, category, manager=manager) with self.assertRaises(BackendError): _ = [item for item in items] filepaths = manager.search('http://example.com/', 'ErrorCommandBackend', 'mock_item', str_to_datetime('1970-01-01')) self.assertEqual(len(filepaths), 0)
def metadata_updated_on(item): """Extracts the update time from a MozillaClub item. The timestamp is extracted from 'updated' field. This date is in ISO format and it needs to be converted to a float value. :param item: item generated by the backend :returns: a UNIX timestamp """ date = str_to_datetime(item['updated']) return float(date.timestamp())
def test_no_archived_items(self): """Test when no archived items are available""" manager = ArchiveManager(self.test_path) category = 'mock_item' args = { 'origin': 'http://example.com/', 'tag': 'test', 'subtype': 'mocksubtype', 'from-date': str_to_datetime('2015-01-01') } items = fetch(CommandBackend, args, category, manager=manager) items = [item for item in items] self.assertEqual(len(items), 5) # There aren't items for this category items = fetch_from_archive(CommandBackend, args, manager, 'alt_item', str_to_datetime('1970-01-01')) items = [item for item in items] self.assertEqual(len(items), 0)
def __get_next_event(self, event_fields): # Fill the empty event with all fields as None event = {key: None for key in event_fields.values()} event['updated'] = DEFAULT_DATETIME.isoformat() last_col = 0 while self.ncell < len(self.cells): # Get all cols (cells) for the event (row) cell = self.cells[self.ncell] ncol = int(cell['gs$cell']['col']) if ncol <= last_col: # new event (row) detected: new cell column lower than last break event[event_fields[ncol]] = cell['content']['$t'] # Add an extra column with the update datetime cell_update = str_to_datetime(cell['updated']['$t']) if cell_update > str_to_datetime(event['updated']): event['updated'] = cell['updated']['$t'] last_col = ncol self.ncell += 1 return event
def get_ocean_backend(backend_cmd, enrich_backend, no_incremental, filter_raw=None): """ Get the ocean backend configured to start from the last enriched date """ if no_incremental: last_enrich = None else: last_enrich = get_last_enrich(backend_cmd, enrich_backend, filter_raw=filter_raw) logger.debug("Last enrichment: {}".format(last_enrich)) backend = None connector = get_connectors()[enrich_backend.get_connector_name()] if backend_cmd: backend_cmd = init_backend(backend_cmd) backend = backend_cmd.backend signature = inspect.signature(backend.fetch) if 'from_date' in signature.parameters: ocean_backend = connector[1](backend, from_date=last_enrich) elif 'offset' in signature.parameters: ocean_backend = connector[1](backend, offset=last_enrich) else: if last_enrich: ocean_backend = connector[1](backend, from_date=last_enrich) else: ocean_backend = connector[1](backend) else: # We can have params for non perceval backends also params = enrich_backend.backend_params if params: try: date_pos = params.index('--from-date') last_enrich = str_to_datetime(params[date_pos + 1]) except ValueError: pass if last_enrich: ocean_backend = connector[1](backend, from_date=last_enrich) else: ocean_backend = connector[1](backend) if filter_raw: ocean_backend.set_filter_raw(filter_raw) return ocean_backend
def parse(self, *args): """Parse a list of arguments. Parse argument strings needed to run a backend command. The result will be a `argparse.Namespace` object populated with the values obtained after the validation of the parameters. :param args: argument strings :result: an object with the parsed values """ parsed_args = self.parser.parse_args(args) # Category was not set, remove it if parsed_args.category is None: delattr(parsed_args, 'category') if self._from_date: parsed_args.from_date = str_to_datetime(parsed_args.from_date) if self._to_date and parsed_args.to_date: parsed_args.to_date = str_to_datetime(parsed_args.to_date) if self._archive and parsed_args.archived_since: parsed_args.archived_since = str_to_datetime( parsed_args.archived_since) if self._archive and parsed_args.fetch_archive and parsed_args.no_archive: raise AttributeError( "fetch-archive and no-archive arguments are not compatible") if self._archive and parsed_args.fetch_archive and not parsed_args.category: raise AttributeError("fetch-archive needs a category to work with") # Set aliases for alias, arg in self.aliases.items(): if (alias not in parsed_args) and (arg in parsed_args): value = getattr(parsed_args, arg, None) setattr(parsed_args, alias, value) return parsed_args
def metadata_updated_on(item): """Extracts and coverts the update time from a Confluence item. The timestamp is extracted from 'when' field on 'version' section. This date is converted to UNIX timestamp format. :param item: item generated by the backend :returns: a UNIX timestamp """ ts = item['version']['when'] ts = str_to_datetime(ts) return ts.timestamp()
def parse(self, *args): """Parse a list of arguments. Parse argument strings needed to run a backend command. The result will be a `argparse.Namespace` object populated with the values obtained after the validation of the parameters. :param args: argument strings :result: an object with the parsed values """ parsed_args = self.parser.parse_args(args) # Category was not set, remove it if parsed_args.category is None: delattr(parsed_args, 'category') if self._from_date: parsed_args.from_date = str_to_datetime(parsed_args.from_date) if self._to_date and parsed_args.to_date: parsed_args.to_date = str_to_datetime(parsed_args.to_date) return parsed_args
def metadata_updated_on(item): """Extracts the update time from a RSS item. The timestamp is extracted from 'published' field. This date is a datetime string that needs to be converted to a UNIX timestamp float value. :param item: item generated by the backend :returns: a UNIX timestamp """ ts = str_to_datetime(item['published']) return ts.timestamp()
def metadata_updated_on(item): """Extracts and coverts the sent time of a message from a Gitter item. The timestamp is extracted from 'sent' field and converted to a UNIX timestamp. :param item: item generated by the backend :returns: a UNIX timestamp """ ts = str_to_datetime(item['sent']) return ts.timestamp()
def metadata_updated_on(item): """Extracts and coverts the update time from a Redmine item. The timestamp is extracted from 'updated_on' field and converted to a UNIX timestamp. :param item: item generated by the backend :returns: a UNIX timestamp """ ts = item['updated_on'] ts = str_to_datetime(ts) return ts.timestamp()
def __add_commit_info(self, df_columns, item): commit_data = item["data"] repository = item["origin"] creation_date = str_to_datetime(commit_data['AuthorDate']) df_columns[Git.COMMIT_HASH].append(commit_data['commit']) df_columns[Git.COMMIT_ID].append(commit_data['commit']) df_columns[Git.COMMIT_EVENT].append(Git.EVENT_COMMIT) df_columns[Git.COMMIT_DATE].append(creation_date) df_columns[Git.COMMIT_OWNER].append(commit_data['Author']) df_columns[Git.COMMIT_COMMITTER].append(commit_data['Commit']) df_columns[Git.COMMIT_COMMITTER_DATE].append(str_to_datetime(commit_data['CommitDate'])) df_columns[Git.COMMIT_REPOSITORY].append(repository) if 'message' in commit_data.keys(): df_columns[Git.COMMIT_MESSAGE].append(commit_data['message']) else: df_columns[Git.COMMIT_MESSAGE].append('') author_domain = self.enrich.get_identity_domain(self.enrich.get_sh_identity(item, 'Author')) df_columns[Git.AUTHOR_DOMAIN].append(author_domain)
def get_time_first_review_patchset(self, patchset): """Get the first date at which a review was made on the patchset by someone other than the user who created the patchset """ patchset_author = patchset.get('author', None) patchset_author_username = patchset_author.get('username', None) if patchset_author else None patchset_author_email = patchset_author.get('email', None) if patchset_author else None patchset_created_on = str_to_datetime(patchset['createdOn']).isoformat() first_review = None approvals = patchset.get('approvals', []) for approval in approvals: if approval['type'] != CODE_REVIEW_TYPE: continue approval_granted_on = str_to_datetime(approval['grantedOn']).isoformat() if approval_granted_on < patchset_created_on: continue approval_by = approval.get('by', None) approval_by_username = approval_by.get('username', None) if approval_by else None approval_by_email = approval_by.get('email', None) if approval_by else None if approval_by_username and patchset_author_username: first_review = approval['grantedOn'] if approval_by_username != patchset_author_username else None elif approval_by_email and patchset_author_email: first_review = approval['grantedOn'] if approval_by_email != patchset_author_email else None else: # if patchset_author or approval_by is None first_review = approval['grantedOn'] if first_review: break return first_review
def get_rich_comment(self, item, answer, comment): ecomment = self.get_rich_item(item) # reuse all fields from item ecomment['id'] = str(ecomment['id']) + '_' + str(answer['id']) + '_' + str(comment['id']) ecomment['url'] = item['data']['url'] + "/?answer=" ecomment['url'] += answer['id'] + '#post-id-' + answer['id'] if 'author' in comment: # Not sure if this format is present in some version of askbot ecomment['author_askbot_user_name'] = comment['author']['username'] ecomment['author_askbot_id'] = str(comment['author']['id']) ecomment['author_url'] = ecomment['origin'] + '/users/' ecomment['author_url'] += comment['author']['id'] + '/' + comment['author']['username'] elif 'user_display_name' in comment: ecomment['author_askbot_user_name'] = comment['user_display_name'] ecomment['author_askbot_id'] = str(comment['user_id']) if 'summary' in comment: ecomment['summary'] = comment['summary'] ecomment['score'] = int(comment['score']) if comment['score'] else 0 dfield = 'added_at' if 'comment_added_at' in comment: dfield = 'comment_added_at' if self.sortinghat: if dfield == 'added_at': comment['added_at_date'] = unixtime_to_datetime(float(comment[dfield])).isoformat() else: comment['added_at_date'] = comment[dfield] ecomment.update(self.get_item_sh(comment, date_field="added_at_date")) if ecomment['author_user_name'] != ecomment['author_askbot_user_name']: logger.warning('Bad SH identity in askbot comment. Found %s expecting %s', ecomment['author_user_name'], ecomment['author_askbot_user_name']) if dfield == 'added_at': comment_at = unixtime_to_datetime(float(comment[dfield])) else: comment_at = str_to_datetime(comment[dfield]) added_at = unixtime_to_datetime(float(item['data']["added_at"])) ecomment['time_from_question'] = get_time_diff_days(added_at, comment_at) ecomment['type'] = 'comment' ecomment.update(self.get_grimoire_fields(comment_at.isoformat(), ecomment['type'])) # Clean items fields not valid in comments for f in ['is_askbot_question', 'author_reputation', 'author_badges', 'is_correct', 'comment_count']: if f in ecomment: ecomment.pop(f) return ecomment
def _fetch_and_parse_messages(self, mailing_list, from_date): """Fetch and parse the messages from a mailing list""" from_date = datetime_to_utc(from_date) nmsgs, imsgs, tmsgs = (0, 0, 0) for mbox in mailing_list.mboxes: tmp_path = None try: tmp_path = self._copy_mbox(mbox) for message in self.parse_mbox(tmp_path): tmsgs += 1 if not self._validate_message(message): imsgs += 1 continue # Ignore those messages sent before the given date dt = str_to_datetime(message[MBox.DATE_FIELD]) if dt < from_date: logger.debug("Message %s sent before %s; skipped", message['unixfrom'], str(from_date)) tmsgs -= 1 continue # Convert 'CaseInsensitiveDict' to dict message = self._casedict_to_dict(message) nmsgs += 1 logger.debug("Message %s parsed", message['unixfrom']) yield message except (OSError, EOFError) as e: logger.warning("Ignoring %s mbox due to: %s", mbox.filepath, str(e)) except Exception as e: if tmp_path and os.path.exists(tmp_path): os.remove(tmp_path) raise e finally: if tmp_path and os.path.exists(tmp_path): os.remove(tmp_path) logger.info("Done. %s/%s messages fetched; %s ignored", nmsgs, tmsgs, imsgs)
def metadata_updated_on(item): """Extracts the update time from a GitLab item. The timestamp used is extracted from 'updated_at' field. This date is converted to UNIX timestamp format. As GitLab dates are in UTC the conversion is straightforward. :param item: item generated by the backend :returns: a UNIX timestamp """ ts = item['updated_at'] ts = str_to_datetime(ts) return ts.timestamp()
def metadata_updated_on(item): """Extracts the update time from a Discourse item. The timestamp used is extracted from 'last_posted_at' field. This date is converted to UNIX timestamp format taking into account the timezone of the date. :param item: item generated by the backend :returns: a UNIX timestamp """ ts = item['last_posted_at'] ts = str_to_datetime(ts) return ts.timestamp()
def __fetch_buglist(self, from_date): buglist = self.__fetch_and_parse_buglist_page(from_date) while buglist: bug = buglist.pop(0) last_date = bug['changeddate'] yield bug # Bugzilla does not support pagination. Due to this, # the next list of bugs is requested adding one second # to the last date obtained. if not buglist: from_date = str_to_datetime(last_date) from_date += datetime.timedelta(seconds=1) buglist = self.__fetch_and_parse_buglist_page(from_date)
def _add_metadata(self, df_columns, item): metadata__timestamp = item["metadata__timestamp"] metadata__updated_on = item["metadata__updated_on"] metadata__enriched_on = dt.utcnow().isoformat() df_columns[Events.META_TIMESTAMP].append(metadata__timestamp) df_columns[Events.META_UPDATED_ON].append(metadata__updated_on) df_columns[Events.META_ENRICHED_ON].append(metadata__enriched_on) # If called after '__add_sh_info', item will already contain # 'grimoire_creation_date' if Events.GRIMOIRE_CREATION_DATE in item: creation_date = item[Events.GRIMOIRE_CREATION_DATE] else: creation_date = str_to_datetime(item['data']['AuthorDate']) df_columns[Events.GRIMOIRE_CREATION_DATE].append(creation_date) # Perceval fields df_columns[Events.PERCEVAL_UUID].append(item['uuid'])
def eventize(self, granularity): """ This splits the JSON information found at self.events into the several events. For this there are three different levels of time consuming actions: 1-soft, 2-medium and 3-hard. Level 1 provides events about emails Level 2 not implemented Level 3 not implemented :param granularity: Levels of time consuming actions to calculate events :type granularity: integer :returns: Pandas dataframe with splitted events. :rtype: pandas.DataFrame """ email = {} # First level granularity email[Email.EMAIL_ID] = [] email[Email.EMAIL_EVENT] = [] email[Email.EMAIL_DATE] = [] email[Email.EMAIL_OWNER] = [] email[Email.EMAIL_SUBJECT] = [] email[Email.EMAIL_BODY] = [] email[Email.EMAIL_ORIGIN] = [] events = pandas.DataFrame() for item in self.items: origin = item["origin"] email_data = item["data"] if granularity == 1: # Changeset submission date: filling a new event email[Email.EMAIL_ID].append(email_data["Message-ID"]) email[Email.EMAIL_EVENT].append(Email.EVENT_OPEN) try: email[Email.EMAIL_DATE].append(str_to_datetime(email_data["Date"], ignoretz=True)) except KeyError: email[Email.EMAIL_DATE].append(str_to_datetime("1970-01-01")) email[Email.EMAIL_OWNER].append(email_data["From"]) email[Email.EMAIL_SUBJECT].append(email_data["Subject"]) try: email[Email.EMAIL_BODY].append(email_data["body"]["plain"]) except KeyError: email[Email.EMAIL_BODY].append("None") email[Email.EMAIL_ORIGIN].append(origin) if granularity == 2: # TDB pass if granularity == 3: # TDB pass # Done in this way to have an order (and not a direct cast) events[Email.EMAIL_ID] = email[Email.EMAIL_ID] events[Email.EMAIL_EVENT] = email[Email.EMAIL_EVENT] events[Email.EMAIL_DATE] = email[Email.EMAIL_DATE] events[Email.EMAIL_OWNER] = email[Email.EMAIL_OWNER] events[Email.EMAIL_SUBJECT] = email[Email.EMAIL_SUBJECT] events[Email.EMAIL_BODY] = email[Email.EMAIL_BODY] events[Email.EMAIL_ORIGIN] = email[Email.EMAIL_ORIGIN] return events
def eventize(self, granularity): """ This splits the JSON information found at self.events into the several events. For this there are three different levels of time consuming actions: 1-soft, 2-medium and 3-hard. Level 1 provides events about open and closed issues. Level 2 provides events about the rest of the status updates. Level 3 provides events about the rest of the values in any of the fields. :param granularity: Levels of time consuming actions to calculate events :type granularity: integer :returns: Pandas dataframe with splitted events. :rtype: pandas.DataFrame """ issue = {} issue[BugzillaRest.ISSUE_ID] = [] issue[BugzillaRest.ISSUE_EVENT] = [] issue[BugzillaRest.ISSUE_DATE] = [] issue[BugzillaRest.ISSUE_OWNER] = [] issue[BugzillaRest.ISSUE_ADDED] = [] issue[BugzillaRest.ISSUE_REMOVED] = [] events = pandas.DataFrame() for item in self.items: bug_data = item["data"] if granularity == 1: # Open Date: filling a new event issue[BugzillaRest.ISSUE_ID].append(bug_data['id']) issue[BugzillaRest.ISSUE_EVENT].append(BugzillaRest.EVENT_OPEN) issue[BugzillaRest.ISSUE_DATE].append(str_to_datetime(bug_data['creation_time'])) issue[BugzillaRest.ISSUE_OWNER].append(bug_data['creator_detail']["real_name"]) issue[BugzillaRest.ISSUE_ADDED].append("-") issue[BugzillaRest.ISSUE_REMOVED].append("-") # Adding the rest of the status updates (if there were any) if 'history' in bug_data.keys(): history = bug_data["history"] for step in history: # Filling a new event who = step["who"] when = str_to_datetime(step["when"]) changes = step["changes"] for change in changes: issue[BugzillaRest.ISSUE_ID].append(bug_data['id']) issue[BugzillaRest.ISSUE_EVENT].append("ISSUE_" + change["field_name"]) issue[BugzillaRest.ISSUE_ADDED].append(change["added"]) issue[BugzillaRest.ISSUE_REMOVED].append(change["removed"]) issue[BugzillaRest.ISSUE_DATE].append(when) issue[BugzillaRest.ISSUE_OWNER].append(who) if granularity == 2: # TBD Let's produce an index with all of the changes. # Let's have in mind the point about having the changes of initiating # the ticket. pass if granularity == 3: # TDB pass # Done in this way to have an order (and not a direct cast) events[BugzillaRest.ISSUE_ID] = issue[BugzillaRest.ISSUE_ID] events[BugzillaRest.ISSUE_EVENT] = issue[BugzillaRest.ISSUE_EVENT] events[BugzillaRest.ISSUE_DATE] = issue[BugzillaRest.ISSUE_DATE] events[BugzillaRest.ISSUE_OWNER] = issue[BugzillaRest.ISSUE_OWNER] events[BugzillaRest.ISSUE_ADDED] = issue[BugzillaRest.ISSUE_ADDED] events[BugzillaRest.ISSUE_REMOVED] = issue[BugzillaRest.ISSUE_REMOVED] return events
def eventize(self, granularity): """ This splits the JSON information found at self.events into the several events. For this there are three different levels of time consuming actions: 1-soft, 2-medium and 3-hard. Level 1 provides events about open and closed issues. Level 2 provides events about the rest of the status updates. Level 3 provides events about the rest of the values in any of the fields. :param granularity: Levels of time consuming actions to calculate events :type granularity: integer :returns: Pandas dataframe with splitted events. :rtype: pandas.DataFrame """ issue = {} issue[Bugzilla.ISSUE_ID] = [] issue[Bugzilla.ISSUE_EVENT] = [] issue[Bugzilla.ISSUE_DATE] = [] issue[Bugzilla.ISSUE_OWNER] = [] events = pandas.DataFrame() for item in self.items: bug_data = item["data"] if granularity == 1: # Open Date: filling a new event issue[Bugzilla.ISSUE_ID].append(bug_data['bug_id'][0]['__text__']) issue[Bugzilla.ISSUE_EVENT].append(Bugzilla.EVENT_OPEN) issue[Bugzilla.ISSUE_DATE].append(str_to_datetime(bug_data['creation_ts'][0]['__text__'])) issue[Bugzilla.ISSUE_OWNER].append(bug_data['reporter'][0]["__text__"]) # Adding the rest of the status updates (if there were any) if 'activity' in bug_data.keys(): activity = bug_data["activity"] for change in activity: # if change["What"] == "Status": # Filling a new event issue[Bugzilla.ISSUE_ID].append(bug_data['bug_id'][0]['__text__']) issue[Bugzilla.ISSUE_EVENT].append("ISSUE_" + change["Added"]) issue[Bugzilla.ISSUE_DATE].append(str_to_datetime(change["When"])) issue[Bugzilla.ISSUE_OWNER].append(change["Who"]) if granularity == 2: # TBD Let's produce an index with all of the changes. # Let's have in mind the point about having the changes of initiating # the ticket. pass if granularity == 3: # TDB pass # Done in this way to have an order (and not a direct cast) events[Bugzilla.ISSUE_ID] = issue[Bugzilla.ISSUE_ID] events[Bugzilla.ISSUE_EVENT] = issue[Bugzilla.ISSUE_EVENT] events[Bugzilla.ISSUE_DATE] = issue[Bugzilla.ISSUE_DATE] events[Bugzilla.ISSUE_OWNER] = issue[Bugzilla.ISSUE_OWNER] return events