示例#1
0
    def get_latest_comment_date(self, item):
        """Get the date of the latest comment on the issue/pr"""

        comment_dates = [
            str_to_datetime(comment['created_at'])
            for comment in item['comments_data']
        ]
        if comment_dates:
            return max(comment_dates)
        return None
示例#2
0
    def __add_milestone_info(self, item_data, eitem):
        """Add milestone tag, start date and due date to the enriched item"""

        eitem['milestone'] = NO_MILESTONE_TAG
        eitem['milestone_start_date'] = None
        eitem['milestone_due_date'] = None

        if 'milestone' in item_data and item_data['milestone']:
            eitem['milestone'] = item_data['milestone']['title']

            milestone = item_data['milestone']

            start_date_str = milestone.get('start_date', None)
            due_date_str = milestone.get('due_date', None)

            if start_date_str:
                eitem['milestone_start_date'] = str_to_datetime(start_date_str).replace(tzinfo=None).isoformat()

            if due_date_str:
                eitem['milestone_due_date'] = str_to_datetime(due_date_str).replace(tzinfo=None).isoformat()
示例#3
0
    def __fix_field_date(self, item, attribute):
        """Fix possible errors in the field date"""

        field_date = str_to_datetime(item[attribute])

        try:
            _ = int(field_date.strftime("%z")[0:3])
        except ValueError:
            logger.warning("%s in commit %s has a wrong format", attribute,
                           item['commit'])
            item[attribute] = field_date.replace(tzinfo=None).isoformat()
示例#4
0
def fix_field_date(date_value):
    """Fix possible errors in the field date"""

    field_date = str_to_datetime(date_value)

    try:
        _ = int(field_date.strftime("%z")[0:3])
    except ValueError:
        field_date = field_date.replace(tzinfo=None)

    return field_date.isoformat()
示例#5
0
 def get_time_to_merge_request_response(self, item):
     """Get the first date at which a review was made on the PR by someone
     other than the user who created the PR
     """
     review_dates = [
         str_to_datetime(review['created_at'])
         for review in item['review_comments_data']
         if item['user']['login'] != review['user']['login']
     ]
     if review_dates:
         return min(review_dates)
     return None
示例#6
0
    def metadata_updated_on(item):
        """Extracts the update time from a Kitsune item.

        The timestamp is extracted from 'updated' field.
        This date is a UNIX timestamp but needs to be converted to
        a float value.

        :param item: item generated by the backend

        :returns: a UNIX timestamp
        """
        return float(str_to_datetime(item['updated']).timestamp())
示例#7
0
    def get_grimoire_fields(self, creation_date, item_name):
        """ Return common grimoire fields for all data sources """

        grimoire_date = None
        try:
            grimoire_date = str_to_datetime(creation_date).isoformat()
        except Exception as ex:
            pass

        name = "is_" + self.get_connector_name() + "_" + item_name

        return {"grimoire_creation_date": grimoire_date, name: 1}
示例#8
0
    def metadata_updated_on(item):
        """Extracts the update time from a Rocket.Chat item.

        The timestamp is extracted from 'ts' field,
        and then converted into a UNIX timestamp.

        :param item: item generated by the backend

        :returns: extracted timestamp
        """
        ts = str_to_datetime(item['_updatedAt']).timestamp()
        return ts
示例#9
0
 def get_time_to_first_attention(self, item):
     """Get the first date at which a comment was made to the issue by someone
     other than the user who created the issue
     """
     comment_dates = [
         str_to_datetime(comment['created_at'])
         for comment in item['comments_data']
         if item['user']['login'] != comment['user']['login']
     ]
     if comment_dates:
         return min(comment_dates)
     return None
示例#10
0
    def get_time_first_review(self, review):
        """Get the first date at which a review was made on the changeset by someone
        other than the user who created the changeset
        """
        changeset_owner = review.get('owner', None)
        changeset_owner_username = changeset_owner.get('username', None) if changeset_owner else None
        changeset_owner_email = changeset_owner.get('email', None) if changeset_owner else None
        changeset_created_on = str_to_datetime(review['createdOn']).isoformat()

        first_review = None

        patchsets = review.get('patchSets', [])
        for patchset in patchsets:

            approvals = patchset.get('approvals', [])
            for approval in approvals:

                if approval['type'] != CODE_REVIEW_TYPE:
                    continue

                approval_granted_on = str_to_datetime(approval['grantedOn']).isoformat()
                if approval_granted_on < changeset_created_on:
                    continue

                approval_by = approval.get('by', None)
                approval_by_username = approval_by.get('username', None) if approval_by else None
                approval_by_email = approval_by.get('email', None) if approval_by else None

                if approval_by_username and changeset_owner_username:
                    first_review = approval['grantedOn'] if approval_by_username != changeset_owner_username else None
                elif approval_by_email and changeset_owner_email:
                    first_review = approval['grantedOn'] if approval_by_email != changeset_owner_email else None
                else:
                    # if changeset_owner or approval_by is None
                    first_review = approval['grantedOn']

                if first_review:
                    return first_review

        return first_review
示例#11
0
    def fetch_items(self, category, **kwargs):
        """Fetch the tweets

        :param category: the category of items to fetch
        :param kwargs: backend arguments

        :returns: a generator of items
        """
        since_id = kwargs['since_id']
        max_id = kwargs['max_id']
        geocode = kwargs['geocode']
        lang = kwargs['lang']
        entities = kwargs['include_entities']
        tweets_type = kwargs['result_type']

        logger.info("Fetching tweets %s from %s to %s",
                    self.query, str(since_id),
                    str(max_id) if max_id else '--')

        tweets_ids = []
        min_date = None
        max_date = None
        group_tweets = self.client.tweets(self.query, since_id=since_id, max_id=max_id, geocode=geocode,
                                          lang=lang, include_entities=entities, result_type=tweets_type)

        for tweets in group_tweets:
            for i in range(len(tweets)):
                tweet = tweets[i]
                tweets_ids.append(tweet['id'])

                if tweets[-1] == tweet:
                    min_date = str_to_datetime(tweets[-1]['created_at'])

                if tweets[0] == tweet and not max_date:
                    max_date = str_to_datetime(tweets[0]['created_at'])

                yield tweet

        logger.info("Fetch process completed: %s (unique %s) tweets fetched, from %s to %s",
                    len(tweets_ids), len(list(set(tweets_ids))), min_date, max_date)
    def test_archive(self):
        """Test whether a set of items is fetched from the archive"""

        manager = ArchiveManager(self.test_path)

        category = 'mock_item'
        args = {
            'origin': 'http://example.com/',
            'tag': 'test',
            'subtype': 'mocksubtype',
            'from-date': str_to_datetime('2015-01-01')
        }

        # First, fetch the items twice to check if several archive
        # are used
        items = fetch(CommandBackend, args, category, manager=manager)
        items = [item for item in items]
        self.assertEqual(len(items), 5)

        items = fetch(CommandBackend, args, category, manager=manager)
        items = [item for item in items]
        self.assertEqual(len(items), 5)

        # Fetch items from the archive
        items = fetch_from_archive(CommandBackend, args, manager, category,
                                   str_to_datetime('1970-01-01'))
        items = [item for item in items]

        self.assertEqual(len(items), 10)

        for x in range(2):
            for y in range(5):
                item = items[y + (x * 5)]
                expected_uuid = uuid('http://example.com/', str(y))

                self.assertEqual(item['data']['item'], y)
                self.assertEqual(item['data']['archive'], True)
                self.assertEqual(item['origin'], 'http://example.com/')
                self.assertEqual(item['uuid'], expected_uuid)
                self.assertEqual(item['tag'], 'test')
示例#13
0
 def archived_after(self, value):
     if value is None:
         self._archived_after = None
     elif isinstance(value, datetime.datetime):
         self._archived_after = datetime_to_utc(value)
     elif isinstance(value, str):
         try:
             self._archived_after = str_to_datetime(value)
         except InvalidDateError as e:
             raise ValueError("'archived_after' is invalid; %s" % str(e))
     else:
         raise ValueError("'archived_after' must be either a str or a datetime; %s given"
                          % str(type(value)))
示例#14
0
    def test_remove_archive_on_error(self):
        """Test whether an archive is removed when an unhandled exception occurs"""

        manager = ArchiveManager(self.test_path)

        category = 'mock_item'
        args = {
            'origin': 'http://example.com/',
            'tag': 'test',
            'subtype': 'mocksubtype',
            'from-date': str_to_datetime('2015-01-01')
        }

        items = fetch(ErrorCommandBackend, args, category, manager=manager)

        with self.assertRaises(BackendError):
            _ = [item for item in items]

        filepaths = manager.search('http://example.com/', 'ErrorCommandBackend',
                                   'mock_item', str_to_datetime('1970-01-01'))

        self.assertEqual(len(filepaths), 0)
    def metadata_updated_on(item):
        """Extracts the update time from a MozillaClub item.

        The timestamp is extracted from 'updated' field.
        This date is in ISO format and it needs to be converted to
        a float value.

        :param item: item generated by the backend

        :returns: a UNIX timestamp
        """
        date = str_to_datetime(item['updated'])
        return float(date.timestamp())
    def test_no_archived_items(self):
        """Test when no archived items are available"""

        manager = ArchiveManager(self.test_path)

        category = 'mock_item'
        args = {
            'origin': 'http://example.com/',
            'tag': 'test',
            'subtype': 'mocksubtype',
            'from-date': str_to_datetime('2015-01-01')
        }

        items = fetch(CommandBackend, args, category, manager=manager)
        items = [item for item in items]
        self.assertEqual(len(items), 5)

        # There aren't items for this category
        items = fetch_from_archive(CommandBackend, args, manager, 'alt_item',
                                   str_to_datetime('1970-01-01'))
        items = [item for item in items]
        self.assertEqual(len(items), 0)
    def __get_next_event(self, event_fields):
        # Fill the empty event with all fields as None
        event = {key: None for key in event_fields.values()}
        event['updated'] = DEFAULT_DATETIME.isoformat()

        last_col = 0
        while self.ncell < len(self.cells):
            # Get all cols (cells) for the event (row)
            cell = self.cells[self.ncell]
            ncol = int(cell['gs$cell']['col'])
            if ncol <= last_col:
                # new event (row) detected: new cell column lower than last
                break
            event[event_fields[ncol]] = cell['content']['$t']
            # Add an extra column with the update datetime
            cell_update = str_to_datetime(cell['updated']['$t'])
            if cell_update > str_to_datetime(event['updated']):
                event['updated'] = cell['updated']['$t']
            last_col = ncol
            self.ncell += 1

        return event
示例#18
0
def get_ocean_backend(backend_cmd,
                      enrich_backend,
                      no_incremental,
                      filter_raw=None):
    """ Get the ocean backend configured to start from the last enriched date """

    if no_incremental:
        last_enrich = None
    else:
        last_enrich = get_last_enrich(backend_cmd,
                                      enrich_backend,
                                      filter_raw=filter_raw)

    logger.debug("Last enrichment: {}".format(last_enrich))

    backend = None

    connector = get_connectors()[enrich_backend.get_connector_name()]

    if backend_cmd:
        backend_cmd = init_backend(backend_cmd)
        backend = backend_cmd.backend

        signature = inspect.signature(backend.fetch)
        if 'from_date' in signature.parameters:
            ocean_backend = connector[1](backend, from_date=last_enrich)
        elif 'offset' in signature.parameters:
            ocean_backend = connector[1](backend, offset=last_enrich)
        else:
            if last_enrich:
                ocean_backend = connector[1](backend, from_date=last_enrich)
            else:
                ocean_backend = connector[1](backend)
    else:
        # We can have params for non perceval backends also
        params = enrich_backend.backend_params
        if params:
            try:
                date_pos = params.index('--from-date')
                last_enrich = str_to_datetime(params[date_pos + 1])
            except ValueError:
                pass
        if last_enrich:
            ocean_backend = connector[1](backend, from_date=last_enrich)
        else:
            ocean_backend = connector[1](backend)

    if filter_raw:
        ocean_backend.set_filter_raw(filter_raw)

    return ocean_backend
示例#19
0
    def parse(self, *args):
        """Parse a list of arguments.

        Parse argument strings needed to run a backend command. The result
        will be a `argparse.Namespace` object populated with the values
        obtained after the validation of the parameters.

        :param args: argument strings

        :result: an object with the parsed values
        """
        parsed_args = self.parser.parse_args(args)

        # Category was not set, remove it
        if parsed_args.category is None:
            delattr(parsed_args, 'category')

        if self._from_date:
            parsed_args.from_date = str_to_datetime(parsed_args.from_date)
        if self._to_date and parsed_args.to_date:
            parsed_args.to_date = str_to_datetime(parsed_args.to_date)
        if self._archive and parsed_args.archived_since:
            parsed_args.archived_since = str_to_datetime(
                parsed_args.archived_since)

        if self._archive and parsed_args.fetch_archive and parsed_args.no_archive:
            raise AttributeError(
                "fetch-archive and no-archive arguments are not compatible")
        if self._archive and parsed_args.fetch_archive and not parsed_args.category:
            raise AttributeError("fetch-archive needs a category to work with")

        # Set aliases
        for alias, arg in self.aliases.items():
            if (alias not in parsed_args) and (arg in parsed_args):
                value = getattr(parsed_args, arg, None)
                setattr(parsed_args, alias, value)

        return parsed_args
示例#20
0
    def metadata_updated_on(item):
        """Extracts and coverts the update time from a Confluence item.

        The timestamp is extracted from 'when' field on 'version' section.
        This date is converted to UNIX timestamp format.

        :param item: item generated by the backend

        :returns: a UNIX timestamp
        """
        ts = item['version']['when']
        ts = str_to_datetime(ts)

        return ts.timestamp()
示例#21
0
    def parse(self, *args):
        """Parse a list of arguments.

        Parse argument strings needed to run a backend command. The result
        will be a `argparse.Namespace` object populated with the values
        obtained after the validation of the parameters.

        :param args: argument strings

        :result: an object with the parsed values
        """
        parsed_args = self.parser.parse_args(args)

        # Category was not set, remove it
        if parsed_args.category is None:
            delattr(parsed_args, 'category')

        if self._from_date:
            parsed_args.from_date = str_to_datetime(parsed_args.from_date)
        if self._to_date and parsed_args.to_date:
            parsed_args.to_date = str_to_datetime(parsed_args.to_date)

        return parsed_args
示例#22
0
    def metadata_updated_on(item):
        """Extracts the update time from a RSS item.

        The timestamp is extracted from 'published' field.
        This date is a datetime string that needs to be converted to
        a UNIX timestamp float value.

        :param item: item generated by the backend

        :returns: a UNIX timestamp
        """
        ts = str_to_datetime(item['published'])

        return ts.timestamp()
示例#23
0
    def metadata_updated_on(item):
        """Extracts and coverts the sent time of a message
        from a Gitter item.

        The timestamp is extracted from 'sent' field and
        converted to a UNIX timestamp.

        :param item: item generated by the backend

        :returns: a UNIX timestamp
        """
        ts = str_to_datetime(item['sent'])

        return ts.timestamp()
示例#24
0
    def metadata_updated_on(item):
        """Extracts and coverts the update time from a Redmine item.

        The timestamp is extracted from 'updated_on' field and converted
        to a UNIX timestamp.

        :param item: item generated by the backend

        :returns: a UNIX timestamp
        """
        ts = item['updated_on']
        ts = str_to_datetime(ts)

        return ts.timestamp()
示例#25
0
    def __add_commit_info(self, df_columns, item):

        commit_data = item["data"]
        repository = item["origin"]

        creation_date = str_to_datetime(commit_data['AuthorDate'])

        df_columns[Git.COMMIT_HASH].append(commit_data['commit'])

        df_columns[Git.COMMIT_ID].append(commit_data['commit'])
        df_columns[Git.COMMIT_EVENT].append(Git.EVENT_COMMIT)
        df_columns[Git.COMMIT_DATE].append(creation_date)
        df_columns[Git.COMMIT_OWNER].append(commit_data['Author'])
        df_columns[Git.COMMIT_COMMITTER].append(commit_data['Commit'])
        df_columns[Git.COMMIT_COMMITTER_DATE].append(str_to_datetime(commit_data['CommitDate']))
        df_columns[Git.COMMIT_REPOSITORY].append(repository)
        if 'message' in commit_data.keys():
            df_columns[Git.COMMIT_MESSAGE].append(commit_data['message'])
        else:
            df_columns[Git.COMMIT_MESSAGE].append('')

        author_domain = self.enrich.get_identity_domain(self.enrich.get_sh_identity(item, 'Author'))
        df_columns[Git.AUTHOR_DOMAIN].append(author_domain)
示例#26
0
    def get_time_first_review_patchset(self, patchset):
        """Get the first date at which a review was made on the patchset by someone
        other than the user who created the patchset
        """
        patchset_author = patchset.get('author', None)
        patchset_author_username = patchset_author.get('username', None) if patchset_author else None
        patchset_author_email = patchset_author.get('email', None) if patchset_author else None
        patchset_created_on = str_to_datetime(patchset['createdOn']).isoformat()

        first_review = None

        approvals = patchset.get('approvals', [])
        for approval in approvals:

            if approval['type'] != CODE_REVIEW_TYPE:
                continue

            approval_granted_on = str_to_datetime(approval['grantedOn']).isoformat()
            if approval_granted_on < patchset_created_on:
                continue

            approval_by = approval.get('by', None)
            approval_by_username = approval_by.get('username', None) if approval_by else None
            approval_by_email = approval_by.get('email', None) if approval_by else None

            if approval_by_username and patchset_author_username:
                first_review = approval['grantedOn'] if approval_by_username != patchset_author_username else None
            elif approval_by_email and patchset_author_email:
                first_review = approval['grantedOn'] if approval_by_email != patchset_author_email else None
            else:
                # if patchset_author or approval_by is None
                first_review = approval['grantedOn']

            if first_review:
                break

        return first_review
示例#27
0
    def get_rich_comment(self, item, answer, comment):
        ecomment = self.get_rich_item(item)  # reuse all fields from item
        ecomment['id'] = str(ecomment['id']) + '_' + str(answer['id']) + '_' + str(comment['id'])
        ecomment['url'] = item['data']['url'] + "/?answer="
        ecomment['url'] += answer['id'] + '#post-id-' + answer['id']
        if 'author' in comment:
            # Not sure if this format is present in some version of askbot
            ecomment['author_askbot_user_name'] = comment['author']['username']
            ecomment['author_askbot_id'] = str(comment['author']['id'])
            ecomment['author_url'] = ecomment['origin'] + '/users/'
            ecomment['author_url'] += comment['author']['id'] + '/' + comment['author']['username']

        elif 'user_display_name' in comment:
            ecomment['author_askbot_user_name'] = comment['user_display_name']
            ecomment['author_askbot_id'] = str(comment['user_id'])
        if 'summary' in comment:
            ecomment['summary'] = comment['summary']
        ecomment['score'] = int(comment['score']) if comment['score'] else 0

        dfield = 'added_at'
        if 'comment_added_at' in comment:
            dfield = 'comment_added_at'

        if self.sortinghat:
            if dfield == 'added_at':
                comment['added_at_date'] = unixtime_to_datetime(float(comment[dfield])).isoformat()
            else:
                comment['added_at_date'] = comment[dfield]
            ecomment.update(self.get_item_sh(comment, date_field="added_at_date"))
            if ecomment['author_user_name'] != ecomment['author_askbot_user_name']:
                logger.warning('Bad SH identity in askbot comment. Found %s expecting %s',
                               ecomment['author_user_name'], ecomment['author_askbot_user_name'])

        if dfield == 'added_at':
            comment_at = unixtime_to_datetime(float(comment[dfield]))
        else:
            comment_at = str_to_datetime(comment[dfield])

        added_at = unixtime_to_datetime(float(item['data']["added_at"]))
        ecomment['time_from_question'] = get_time_diff_days(added_at, comment_at)
        ecomment['type'] = 'comment'
        ecomment.update(self.get_grimoire_fields(comment_at.isoformat(), ecomment['type']))

        # Clean items fields not valid in comments
        for f in ['is_askbot_question', 'author_reputation', 'author_badges', 'is_correct', 'comment_count']:
            if f in ecomment:
                ecomment.pop(f)

        return ecomment
示例#28
0
    def _fetch_and_parse_messages(self, mailing_list, from_date):
        """Fetch and parse the messages from a mailing list"""

        from_date = datetime_to_utc(from_date)

        nmsgs, imsgs, tmsgs = (0, 0, 0)

        for mbox in mailing_list.mboxes:
            tmp_path = None

            try:
                tmp_path = self._copy_mbox(mbox)

                for message in self.parse_mbox(tmp_path):
                    tmsgs += 1

                    if not self._validate_message(message):
                        imsgs += 1
                        continue

                    # Ignore those messages sent before the given date
                    dt = str_to_datetime(message[MBox.DATE_FIELD])

                    if dt < from_date:
                        logger.debug("Message %s sent before %s; skipped",
                                     message['unixfrom'], str(from_date))
                        tmsgs -= 1
                        continue

                    # Convert 'CaseInsensitiveDict' to dict
                    message = self._casedict_to_dict(message)

                    nmsgs += 1
                    logger.debug("Message %s parsed", message['unixfrom'])

                    yield message
            except (OSError, EOFError) as e:
                logger.warning("Ignoring %s mbox due to: %s", mbox.filepath,
                               str(e))
            except Exception as e:
                if tmp_path and os.path.exists(tmp_path):
                    os.remove(tmp_path)
                raise e
            finally:
                if tmp_path and os.path.exists(tmp_path):
                    os.remove(tmp_path)

        logger.info("Done. %s/%s messages fetched; %s ignored", nmsgs, tmsgs,
                    imsgs)
示例#29
0
    def metadata_updated_on(item):
        """Extracts the update time from a GitLab item.

        The timestamp used is extracted from 'updated_at' field.
        This date is converted to UNIX timestamp format. As GitLab
        dates are in UTC the conversion is straightforward.

        :param item: item generated by the backend

        :returns: a UNIX timestamp
        """
        ts = item['updated_at']
        ts = str_to_datetime(ts)

        return ts.timestamp()
    def metadata_updated_on(item):
        """Extracts the update time from a Discourse item.

        The timestamp used is extracted from 'last_posted_at' field.
        This date is converted to UNIX timestamp format taking into
        account the timezone of the date.

        :param item: item generated by the backend

        :returns: a UNIX timestamp
        """
        ts = item['last_posted_at']
        ts = str_to_datetime(ts)

        return ts.timestamp()
示例#31
0
    def __fetch_buglist(self, from_date):
        buglist = self.__fetch_and_parse_buglist_page(from_date)

        while buglist:
            bug = buglist.pop(0)
            last_date = bug['changeddate']
            yield bug

            # Bugzilla does not support pagination. Due to this,
            # the next list of bugs is requested adding one second
            # to the last date obtained.
            if not buglist:
                from_date = str_to_datetime(last_date)
                from_date += datetime.timedelta(seconds=1)
                buglist = self.__fetch_and_parse_buglist_page(from_date)
示例#32
0
    def _add_metadata(self, df_columns, item):
        metadata__timestamp = item["metadata__timestamp"]
        metadata__updated_on = item["metadata__updated_on"]
        metadata__enriched_on = dt.utcnow().isoformat()

        df_columns[Events.META_TIMESTAMP].append(metadata__timestamp)
        df_columns[Events.META_UPDATED_ON].append(metadata__updated_on)
        df_columns[Events.META_ENRICHED_ON].append(metadata__enriched_on)

        # If called after '__add_sh_info', item will already contain
        # 'grimoire_creation_date'

        if Events.GRIMOIRE_CREATION_DATE in item:
            creation_date = item[Events.GRIMOIRE_CREATION_DATE]
        else:
            creation_date = str_to_datetime(item['data']['AuthorDate'])

        df_columns[Events.GRIMOIRE_CREATION_DATE].append(creation_date)

        # Perceval fields
        df_columns[Events.PERCEVAL_UUID].append(item['uuid'])
示例#33
0
    def eventize(self, granularity):
        """ This splits the JSON information found at self.events into the
        several events. For this there are three different levels of time
        consuming actions: 1-soft, 2-medium and 3-hard.

        Level 1 provides events about emails
        Level 2 not implemented
        Level 3 not implemented

        :param granularity: Levels of time consuming actions to calculate events
        :type granularity: integer

        :returns: Pandas dataframe with splitted events.
        :rtype: pandas.DataFrame
        """

        email = {}
        # First level granularity
        email[Email.EMAIL_ID] = []
        email[Email.EMAIL_EVENT] = []
        email[Email.EMAIL_DATE] = []
        email[Email.EMAIL_OWNER] = []
        email[Email.EMAIL_SUBJECT] = []
        email[Email.EMAIL_BODY] = []
        email[Email.EMAIL_ORIGIN] = []

        events = pandas.DataFrame()

        for item in self.items:
            origin = item["origin"]
            email_data = item["data"]
            if granularity == 1:
                # Changeset submission date: filling a new event
                email[Email.EMAIL_ID].append(email_data["Message-ID"])
                email[Email.EMAIL_EVENT].append(Email.EVENT_OPEN)
                try:
                    email[Email.EMAIL_DATE].append(str_to_datetime(email_data["Date"], ignoretz=True))
                except KeyError:
                    email[Email.EMAIL_DATE].append(str_to_datetime("1970-01-01"))
                email[Email.EMAIL_OWNER].append(email_data["From"])
                email[Email.EMAIL_SUBJECT].append(email_data["Subject"])
                try:
                    email[Email.EMAIL_BODY].append(email_data["body"]["plain"])
                except KeyError:
                    email[Email.EMAIL_BODY].append("None")
                email[Email.EMAIL_ORIGIN].append(origin)

            if granularity == 2:
                # TDB
                pass

            if granularity == 3:
                # TDB
                pass

        # Done in this way to have an order (and not a direct cast)
        events[Email.EMAIL_ID] = email[Email.EMAIL_ID]
        events[Email.EMAIL_EVENT] = email[Email.EMAIL_EVENT]
        events[Email.EMAIL_DATE] = email[Email.EMAIL_DATE]
        events[Email.EMAIL_OWNER] = email[Email.EMAIL_OWNER]
        events[Email.EMAIL_SUBJECT] = email[Email.EMAIL_SUBJECT]
        events[Email.EMAIL_BODY] = email[Email.EMAIL_BODY]
        events[Email.EMAIL_ORIGIN] = email[Email.EMAIL_ORIGIN]

        return events
示例#34
0
    def eventize(self, granularity):
        """ This splits the JSON information found at self.events into the
        several events. For this there are three different levels of time
        consuming actions: 1-soft, 2-medium and 3-hard.

        Level 1 provides events about open and closed issues.
        Level 2 provides events about the rest of the status updates.
        Level 3 provides events about the rest of the values in any of the
        fields.

        :param granularity: Levels of time consuming actions to calculate events
        :type granularity: integer

        :returns: Pandas dataframe with splitted events.
        :rtype: pandas.DataFrame
        """

        issue = {}
        issue[BugzillaRest.ISSUE_ID] = []
        issue[BugzillaRest.ISSUE_EVENT] = []
        issue[BugzillaRest.ISSUE_DATE] = []
        issue[BugzillaRest.ISSUE_OWNER] = []
        issue[BugzillaRest.ISSUE_ADDED] = []
        issue[BugzillaRest.ISSUE_REMOVED] = []

        events = pandas.DataFrame()

        for item in self.items:
            bug_data = item["data"]
            if granularity == 1:
                # Open Date: filling a new event
                issue[BugzillaRest.ISSUE_ID].append(bug_data['id'])
                issue[BugzillaRest.ISSUE_EVENT].append(BugzillaRest.EVENT_OPEN)
                issue[BugzillaRest.ISSUE_DATE].append(str_to_datetime(bug_data['creation_time']))
                issue[BugzillaRest.ISSUE_OWNER].append(bug_data['creator_detail']["real_name"])
                issue[BugzillaRest.ISSUE_ADDED].append("-")
                issue[BugzillaRest.ISSUE_REMOVED].append("-")

                # Adding the rest of the status updates (if there were any)
                if 'history' in bug_data.keys():
                    history = bug_data["history"]
                    for step in history:
                        # Filling a new event
                        who = step["who"]
                        when = str_to_datetime(step["when"])
                        changes = step["changes"]
                        for change in changes:
                            issue[BugzillaRest.ISSUE_ID].append(bug_data['id'])
                            issue[BugzillaRest.ISSUE_EVENT].append("ISSUE_" + change["field_name"])
                            issue[BugzillaRest.ISSUE_ADDED].append(change["added"])
                            issue[BugzillaRest.ISSUE_REMOVED].append(change["removed"])
                            issue[BugzillaRest.ISSUE_DATE].append(when)
                            issue[BugzillaRest.ISSUE_OWNER].append(who)

            if granularity == 2:
                # TBD Let's produce an index with all of the changes.
                #    Let's have in mind the point about having the changes of initiating
                #    the ticket.
                pass

            if granularity == 3:
                # TDB
                pass

        # Done in this way to have an order (and not a direct cast)
        events[BugzillaRest.ISSUE_ID] = issue[BugzillaRest.ISSUE_ID]
        events[BugzillaRest.ISSUE_EVENT] = issue[BugzillaRest.ISSUE_EVENT]
        events[BugzillaRest.ISSUE_DATE] = issue[BugzillaRest.ISSUE_DATE]
        events[BugzillaRest.ISSUE_OWNER] = issue[BugzillaRest.ISSUE_OWNER]
        events[BugzillaRest.ISSUE_ADDED] = issue[BugzillaRest.ISSUE_ADDED]
        events[BugzillaRest.ISSUE_REMOVED] = issue[BugzillaRest.ISSUE_REMOVED]

        return events
示例#35
0
    def eventize(self, granularity):
        """ This splits the JSON information found at self.events into the
        several events. For this there are three different levels of time
        consuming actions: 1-soft, 2-medium and 3-hard.

        Level 1 provides events about open and closed issues.
        Level 2 provides events about the rest of the status updates.
        Level 3 provides events about the rest of the values in any of the
        fields.

        :param granularity: Levels of time consuming actions to calculate events
        :type granularity: integer

        :returns: Pandas dataframe with splitted events.
        :rtype: pandas.DataFrame
        """

        issue = {}
        issue[Bugzilla.ISSUE_ID] = []
        issue[Bugzilla.ISSUE_EVENT] = []
        issue[Bugzilla.ISSUE_DATE] = []
        issue[Bugzilla.ISSUE_OWNER] = []

        events = pandas.DataFrame()

        for item in self.items:
            bug_data = item["data"]
            if granularity == 1:
                # Open Date: filling a new event
                issue[Bugzilla.ISSUE_ID].append(bug_data['bug_id'][0]['__text__'])
                issue[Bugzilla.ISSUE_EVENT].append(Bugzilla.EVENT_OPEN)
                issue[Bugzilla.ISSUE_DATE].append(str_to_datetime(bug_data['creation_ts'][0]['__text__']))
                issue[Bugzilla.ISSUE_OWNER].append(bug_data['reporter'][0]["__text__"])

                # Adding the rest of the status updates (if there were any)
                if 'activity' in bug_data.keys():
                    activity = bug_data["activity"]
                    for change in activity:
                        # if change["What"] == "Status":
                        # Filling a new event
                        issue[Bugzilla.ISSUE_ID].append(bug_data['bug_id'][0]['__text__'])
                        issue[Bugzilla.ISSUE_EVENT].append("ISSUE_" + change["Added"])
                        issue[Bugzilla.ISSUE_DATE].append(str_to_datetime(change["When"]))
                        issue[Bugzilla.ISSUE_OWNER].append(change["Who"])

            if granularity == 2:
                # TBD Let's produce an index with all of the changes.
                #    Let's have in mind the point about having the changes of initiating
                #    the ticket.
                pass

            if granularity == 3:
                # TDB
                pass

        # Done in this way to have an order (and not a direct cast)
        events[Bugzilla.ISSUE_ID] = issue[Bugzilla.ISSUE_ID]
        events[Bugzilla.ISSUE_EVENT] = issue[Bugzilla.ISSUE_EVENT]
        events[Bugzilla.ISSUE_DATE] = issue[Bugzilla.ISSUE_DATE]
        events[Bugzilla.ISSUE_OWNER] = issue[Bugzilla.ISSUE_OWNER]

        return events