Пример #1
0
    def _dump_cache(self):
        if any(x for x in self.history
               if not isinstance(x['created_at'], datetime.datetime)):
            logging.error(self.history)
            raise AssertionError(
                u'found a non-datetime created_at in events data')

        if not os.path.isdir(self.cachedir):
            os.makedirs(self.cachedir)

        # keep the timestamp
        cachedata = {
            u'version': self.SCHEMA_VERSION,
            u'updated_at': self.issue.instance.updated_at,
            u'history': self.history
        }

        try:
            with open(self.cachefile, 'wb') as f:
                pickle_dump(cachedata, f)
        except Exception as e:
            logging.error(e)
            if C.DEFAULT_BREAKPOINTS:
                logging.error(u'breakpoint!')
                import epdb
                epdb.st()
            else:
                raise Exception(u'')
Пример #2
0
 def save_pullrequest(self, issue):
     cfile = os.path.join(self.cachedir, u'issues', to_text(issue.number),
                          u'pullrequest.pickle')
     cdir = os.path.dirname(cfile)
     if not os.path.isdir(cdir):
         os.makedirs(cdir)
     with open(cfile, 'wb') as f:
         pickle_dump(issue, f)
Пример #3
0
 def save_issue(self, issue):
     cfile = os.path.join(self.cachedir, u'issues', to_text(issue.number),
                          u'issue.pickle')
     cdir = os.path.dirname(cfile)
     if not os.path.isdir(cdir):
         os.makedirs(cdir)
     logging.debug(u'dump %s' % cfile)
     with open(cfile, 'wb') as f:
         pickle_dump(issue, f)
Пример #4
0
    def pullrequest_filepath_exists(self, filepath):
        ''' Check if a file exists on the submitters branch '''

        # https://github.com/ansible/ansibullbot/issues/406

        # https://developer.github.com/v3/repos/contents/
        #   GET /repos/:owner/:repo/readme
        # "contents_url":
        # "https://api.github.com/repos/ganeshrn/ansible/contents/{+path}",

        # self.pullrequest.head
        #   - ref --> branch name
        #   - repo.full_name

        sha = self.pullrequest.head.sha
        pdata = None
        resp = None
        cachefile = os.path.join(self.cachedir, u'issues',
                                 to_text(self.number), u'shippable_yml.pickle')

        try:
            if os.path.isfile(cachefile):
                with open(cachefile, 'rb') as f:
                    pdata = pickle_load(f)
        except Exception as e:
            logging.error(u'failed to unpickle %s %s' %
                          (cachefile, to_text(e)))

        if not pdata or pdata[0] != sha:

            if self.pullrequest.head.repo:

                url = u'https://api.github.com/repos/'
                url += self.pullrequest.head.repo.full_name
                url += u'/contents/'
                url += filepath

                resp = self.pullrequest._requester.requestJson(
                    u"GET", url, input={u'ref': self.pullrequest.head.ref})

            else:
                # https://github.com/ansible/ansible/pull/19891
                # Sometimes the repo repo/branch has disappeared
                resp = [None]

            pdata = [sha, resp]
            with open(cachefile, 'wb') as f:
                pickle_dump(pdata, f)

        else:
            resp = pdata[1]

        result = False
        if resp[0]:
            result = True
        return result
Пример #5
0
    def load_update_fetch(self, property_name):
        '''Fetch a get() property for an object'''

        edata = None
        events = []
        updated = None
        update = False
        write_cache = False
        self.repo.update()

        pfile = os.path.join(self.cachedir, u'%s.pickle' % property_name)
        pdir = os.path.dirname(pfile)

        if not os.path.isdir(pdir):
            os.makedirs(pdir)

        if os.path.isfile(pfile):
            try:
                with open(pfile, 'rb') as f:
                    edata = pickle_load(f)
            except Exception as e:
                update = True
                write_cache = True

            # check the timestamp on the cache
            if edata:
                updated = edata[0]
                events = edata[1]
                if updated < self.repo.updated_at:
                    update = True
                    write_cache = True

        # pull all events if timestamp is behind or no events cached
        if update or not events:
            write_cache = True
            updated = self.get_current_time()
            try:
                methodToCall = getattr(self.repo, u'get_' + property_name)
            except Exception as e:
                logging.error(e)
                if C.DEFAULT_BREAKPOINTS:
                    logging.error(u'breakpoint!')
                    import epdb
                    epdb.st()
                else:
                    raise Exception(u'unable to get %s' % property_name)
            events = [x for x in methodToCall()]

        if C.DEFAULT_PICKLE_ISSUES:
            if write_cache or not os.path.isfile(pfile):
                # need to dump the pickle back to disk
                edata = [updated, events]
                with open(pfile, 'wb') as f:
                    pickle_dump(edata, f)

        return events
Пример #6
0
    def pullrequest_filepath_exists(self, filepath):
        ''' Check if a file exists on the submitters branch '''

        # https://github.com/ansible/ansibullbot/issues/406

        # https://developer.github.com/v3/repos/contents/
        #   GET /repos/:owner/:repo/readme
        # "contents_url":
        # "https://api.github.com/repos/ganeshrn/ansible/contents/{+path}",

        # self.pullrequest.head
        #   - ref --> branch name
        #   - repo.full_name

        sha = self.pullrequest.head.sha
        pdata = None
        resp = None
        cachefile = os.path.join(
            self.cachedir,
            u'issues',
            to_text(self.number),
            u'shippable_yml.pickle'
        )

        try:
            if os.path.isfile(cachefile):
                with open(cachefile, 'rb') as f:
                    pdata = pickle_load(f)
        except Exception as e:
            logging.error(u'failed to unpickle %s %s' % (cachefile, to_text(e)))

        if not pdata or pdata[0] != sha:

            if self.pullrequest.head.repo:
                url = self.pullrequest.head.repo.url + u'/contents/' + filepath
                resp = self.pullrequest._requester.requestJson(
                    u"GET",
                    url,
                    input={u'ref': self.pullrequest.head.ref}
                )
            else:
                # https://github.com/ansible/ansible/pull/19891
                # Sometimes the repo repo/branch has disappeared
                resp = [None]

            pdata = [sha, resp]
            with open(cachefile, 'wb') as f:
                pickle_dump(pdata, f)

        else:
            resp = pdata[1]

        result = False
        if resp[0]:
            result = True
        return result
Пример #7
0
    def save_issue(self):
        pfile = os.path.join(self.cachedir, u'issues',
                             to_text(self.instance.number), u'issue.pickle')
        pdir = os.path.dirname(pfile)

        if not os.path.isdir(pdir):
            os.makedirs(pdir)

        logging.debug(u'dump %s' % pfile)
        with open(pfile, 'wb') as f:
            pickle_dump(self.instance, f)
Пример #8
0
    def load_update_fetch(self, property_name):
        '''Fetch a get() property for an object'''

        edata = None
        events = []
        updated = None
        update = False
        write_cache = False
        self.repo.update()

        pfile = os.path.join(self.cachedir, u'%s.pickle' % property_name)
        pdir = os.path.dirname(pfile)

        if not os.path.isdir(pdir):
            os.makedirs(pdir)

        if os.path.isfile(pfile):
            try:
                with open(pfile, 'rb') as f:
                    edata = pickle_load(f)
            except Exception as e:
                update = True
                write_cache = True

            # check the timestamp on the cache
            if edata:
                updated = edata[0]
                events = edata[1]
                if updated < self.repo.updated_at:
                    update = True
                    write_cache = True

        # pull all events if timestamp is behind or no events cached
        if update or not events:
            write_cache = True
            updated = self.get_current_time()
            try:
                methodToCall = getattr(self.repo, u'get_' + property_name)
            except Exception as e:
                logging.error(e)
                if C.DEFAULT_BREAKPOINTS:
                    logging.error(u'breakpoint!')
                    import epdb; epdb.st()
                else:
                    raise Exception(u'unable to get %s' % property_name)
            events = [x for x in methodToCall()]

        if write_cache or not os.path.isfile(pfile):
            # need to dump the pickle back to disk
            edata = [updated, events]
            with open(pfile, 'wb') as f:
                pickle_dump(edata, f)

        return events
Пример #9
0
 def save_pullrequest(self, issue):
     cfile = os.path.join(
         self.cachedir,
         u'issues',
         to_text(issue.number),
         u'pullrequest.pickle'
     )
     cdir = os.path.dirname(cfile)
     if not os.path.isdir(cdir):
         os.makedirs(cdir)
     with open(cfile, 'wb') as f:
         pickle_dump(issue, f)
Пример #10
0
    def get_pullrequest_status(self, force_fetch=False):
        fetched = False
        jdata = None
        pdata = None
        # pull out the status url from the raw data
        rd = self.pullrequest_raw_data
        surl = rd[u'statuses_url']

        pfile = os.path.join(self.full_cachedir, u'pr_status.pickle')
        pdir = os.path.dirname(pfile)
        if not os.path.isdir(pdir):
            os.makedirs(pdir)

        if os.path.isfile(pfile):
            logging.info(u'pullrequest_status load pfile')
            with open(pfile, 'rb') as f:
                pdata = pickle_load(f)

        if pdata:
            # is the data stale?
            if pdata[0] < self.pullrequest.updated_at or force_fetch:
                logging.info(u'fetching pr status: stale, previous from %s' %
                             pdata[0])
                jdata = self.github.get_request(surl)

                if isinstance(jdata, dict):
                    # https://github.com/ansible/ansibullbot/issues/959
                    logging.error(
                        u'Got the following error while fetching PR status: %s',
                        jdata.get(u'message'))
                    logging.error(jdata)
                    return []

                self.log_ci_status(jdata)
                fetched = True
            else:
                jdata = pdata[1]

        # missing?
        if not jdata:
            logging.info(u'fetching pr status: !data')
            jdata = self.github.get_request(surl)
            # FIXME? should we self.log_ci_status(jdata) here too?
            fetched = True

        if fetched or not os.path.isfile(pfile):
            logging.info(u'writing %s' % pfile)
            pdata = (self.pullrequest.updated_at, jdata)
            with open(pfile, 'wb') as f:
                pickle_dump(pdata, f)

        return jdata
Пример #11
0
 def save_issue(self, issue):
     cfile = os.path.join(
         self.cachedir,
         u'issues',
         to_text(issue.number),
         u'issue.pickle'
     )
     cdir = os.path.dirname(cfile)
     if not os.path.isdir(cdir):
         os.makedirs(cdir)
     logging.debug(u'dump %s' % cfile)
     with open(cfile, 'wb') as f:
         pickle_dump(issue, f)
Пример #12
0
    def save_issue(self):
        pfile = os.path.join(
            self.cachedir,
            u'issues',
            to_text(self.instance.number),
            u'issue.pickle'
        )
        pdir = os.path.dirname(pfile)

        if not os.path.isdir(pdir):
            os.makedirs(pdir)

        logging.debug(u'dump %s' % pfile)
        with open(pfile, 'wb') as f:
            pickle_dump(self.instance, f)
Пример #13
0
    def _dump_cache(self):
        if not os.path.isdir(self.cachedir):
            os.makedirs(self.cachedir)

        # keep the timestamp
        cachedata = {u'updated_at': self.issue.instance.updated_at,
                     u'history': self.history}

        try:
            with open(self.cachefile, 'wb') as f:
                pickle_dump(cachedata, f)
        except Exception as e:
            logging.error(e)
            if C.DEFAULT_BREAKPOINTS:
                logging.error(u'breakpoint!')
                import epdb; epdb.st()
            else:
                raise Exception(u'')
Пример #14
0
    def _dump_cache(self):
        if not os.path.isdir(self.cachedir):
            os.makedirs(self.cachedir)

        # keep the timestamp
        cachedata = {u'updated_at': self.issue.instance.updated_at,
                     u'history': self.history}

        try:
            with open(self.cachefile, 'wb') as f:
                pickle_dump(cachedata, f)
        except Exception as e:
            logging.error(e)
            if C.DEFAULT_BREAKPOINTS:
                logging.error(u'breakpoint!')
                import epdb; epdb.st()
            else:
                raise Exception(u'')
Пример #15
0
    def get_members(self, organization):
        """Get members of an organization

        Args:
            organization: name of the organization

        Returns:
            A list of GitHub login belonging to the organization
        """
        members = []
        update = False
        write_cache = False
        now = self.get_current_time()
        gh_org = self._connect().get_organization(organization)

        cachedir = os.path.join(self.cachedir_base, organization)
        if not os.path.isdir(cachedir):
            os.makedirs(cachedir)

        cachefile = os.path.join(cachedir, 'members.pickle')

        if os.path.isfile(cachefile):
            with open(cachefile, 'rb') as f:
                mdata = pickle_load(f)
            members = mdata[1]
            if mdata[0] < gh_org.updated_at:
                update = True
        else:
            update = True
            write_cache = True

        if update:
            members = gh_org.get_members()
            members = [x.login for x in members]

        # save the data
        if write_cache:
            mdata = [now, members]
            with open(cachefile, 'wb') as f:
                pickle_dump(mdata, f)

        return members
Пример #16
0
    def get_members(self, organization):
        """Get members of an organization

        Args:
            organization: name of the organization

        Returns:
            A list of GitHub login belonging to the organization
        """
        members = []
        update = False
        write_cache = False
        now = self.get_current_time()
        gh_org = self._connect().get_organization(organization)

        cachedir = os.path.join(self.cachedir_base, organization)
        if not os.path.isdir(cachedir):
            os.makedirs(cachedir)

        cachefile = os.path.join(cachedir, 'members.pickle')

        if os.path.isfile(cachefile):
            with open(cachefile, 'rb') as f:
                mdata = pickle_load(f)
            members = mdata[1]
            if mdata[0] < gh_org.updated_at:
                update = True
        else:
            update = True
            write_cache = True

        if update:
            members = gh_org.get_members()
            members = [x.login for x in members]

        # save the data
        if write_cache:
            mdata = [now, members]
            with open(cachefile, 'wb') as f:
                pickle_dump(mdata, f)

        return members
Пример #17
0
    def jobs(self):
        if self._jobs is None:
            if self.build_id:
                if not os.path.isdir(self._cachedir):
                    os.makedirs(self._cachedir)
                cache_file = os.path.join(
                    self._cachedir, u'timeline_%s.pickle' % self.build_id)

                resp = fetch(TIMELINE_URL_FMT % self.build_id)
                if resp is None:
                    data = None
                    if os.path.isfile(cache_file):
                        logging.info(
                            u'timeline was probably removed, load it from cache'
                        )
                        with open(cache_file, 'rb') as f:
                            data = pickle_load(f)
                else:
                    data = resp.json()
                    data = (strip_time_safely(data['lastChangedOn']), data)
                    logging.info(u'writing %s' % cache_file)
                    with open(cache_file, 'wb') as f:
                        pickle_dump(data, f)

                if data is not None:
                    data = data[1]
                    self._jobs = [
                        r for r in data['records'] if r['type'] == 'Job'
                    ]
                    self._updated_at = strip_time_safely(
                        data['lastChangedOn'])  # FIXME
                    self._stages = [
                        r for r in data['records'] if r['type'] == 'Stage'
                    ]  # FIXME
                else:
                    self._jobs = []
                    self._updated_at = strip_time_safely('1970-01-01')
                    self._stages = []
            else:
                self._jobs = []
        return self._jobs
Пример #18
0
    def get_artifact(self, name, url):
        if not os.path.isdir(self._cachedir):
            os.makedirs(self._cachedir)

        data = None
        cache_file = os.path.join(
            self._cachedir,
            u'%s_%s.pickle' % (name.replace(' ', '-'), self.build_id))
        if os.path.isfile(cache_file):
            logging.info(u'loading %s' % cache_file)
            with open(cache_file, 'rb') as f:
                data = pickle_load(f)

        if data is None or (data and data[0] < self.updated_at) or not data[1]:
            if data:
                logging.info(u'fetching artifacts: stale, previous from %s' %
                             data[0])
            else:
                logging.info(u'fetching artifacts: stale, no previous data')

            resp = fetch(url, stream=True)
            if resp is not None:
                with BytesIO() as data:
                    for chunk in resp.iter_content(chunk_size=128):
                        data.write(chunk)
                    artifact_zip = ZipFile(data)

                    artifact_data = []
                    for fn in artifact_zip.namelist():
                        if 'ansible-test-' not in fn:
                            continue
                        with artifact_zip.open(fn) as f:
                            artifact_data.append(json.load(f))

                    data = (self.updated_at, artifact_data)
                    logging.info(u'writing %s' % cache_file)
                    with open(cache_file, 'wb') as f:
                        pickle_dump(data, f)
        if data:
            return data[1]
Пример #19
0
    def artifacts(self):
        if self._artifacts is None:
            # FIXME deduplicate code
            if not os.path.isdir(self._cachedir):
                os.makedirs(self._cachedir)

            data = None
            cache_file = os.path.join(self._cachedir,
                                      u'artifacts_%s.pickle' % self.build_id)
            if os.path.isfile(cache_file):
                logging.info(u'load artifacts cache')
                with open(cache_file, 'rb') as f:
                    data = pickle_load(f)

            if data is None or (data
                                and data[0] < self.updated_at) or not data[1]:
                if data:
                    logging.info(
                        u'fetching artifacts: stale, previous from %s' %
                        data[0])
                else:
                    logging.info(
                        u'fetching artifacts: stale, no previous data')

                resp = fetch(ARTIFACTS_URL_FMT % self.build_id)
                if resp is not None:
                    data = [
                        a for a in resp.json()['value']
                        if a['name'].startswith('Bot')
                    ]
                    data = (self.updated_at, data)

                    logging.info(u'writing %s' % cache_file)
                    with open(cache_file, 'wb') as f:
                        pickle_dump(data, f)
            if data:
                self._artifacts = data[1]

        return self._artifacts
Пример #20
0
    def _get_module_blames(self):
        ''' Scrape the blame page for each module and store it '''

        keys = sorted(self.modules.keys())

        # scrape the data
        for k in keys:

            cpath = os.path.join(self.gitrepo.checkoutdir, k)
            if not os.path.isfile(cpath):
                self.committers[k] = {}
                continue

            ghash = self.last_commit_for_file(k)
            pfile = os.path.join(self.scraper_cache,
                                 k.replace(u'/', u'_') + u'.blame.pickle')
            sargs = [u'ansible', u'ansible', u'devel', k]

            refresh = False
            if not os.path.isfile(pfile):
                refresh = True
            else:
                logging.debug(u'load {}'.format(pfile))
                with open(pfile, 'rb') as f:
                    pdata = pickle_load(f)
                if C.DEFAULT_BREAKPOINTS:
                    logging.error(u'breakpoint!')
                    import epdb
                    epdb.st()
                if pdata[0] == ghash:
                    self.committers[k] = pdata[1]
                    if len(pdata) == 3:
                        # use emailmap if available
                        emailmap = pdata[2]
                    else:
                        emailmap = {}
                else:
                    refresh = True

            if refresh:
                if self.gqlc:
                    logging.debug(u'graphql blame usernames {}'.format(pfile))
                    uns, emailmap = self.gqlc.get_usernames_from_filename_blame(
                        *sargs)
                else:
                    emailmap = {}  # scrapping: emails not available
                    logging.debug(u'www blame usernames {}'.format(pfile))
                    uns = self.gws.get_usernames_from_filename_blame(*sargs)
                self.committers[k] = uns
                with open(pfile, 'wb') as f:
                    pickle_dump((ghash, uns, emailmap), f)

            for email, github_id in emailmap.items():
                if email not in self.emails_cache:
                    self.emails_cache[email] = github_id

        # add scraped logins to the map
        for k in keys:
            for idx, x in enumerate(self.commits[k]):
                if x[u'email'] in [u'@']:
                    continue
                if x[u'email'] not in self.emails_cache:
                    self.emails_cache[x[u'email']] = None
                if x[u'login']:
                    self.emails_cache[x[u'email']] = x[u'login']
                    continue

                xhash = x[u'hash']
                for ck, cv in six.iteritems(self.committers[k]):
                    if xhash in cv:
                        self.emails_cache[x[u'email']] = ck
                        break

        # fill in what we can ...
        for k in keys:
            for idx, x in enumerate(self.commits[k]):
                if not x[u'login']:
                    if x[u'email'] in [u'@']:
                        continue
                    if self.emails_cache[x[u'email']]:
                        login = self.emails_cache[x[u'email']]
                        xhash = x[u'hash']
                        self.commits[k][idx][u'login'] = login
                        if login not in self.committers[k]:
                            self.committers[k][login] = []
                        if xhash not in self.committers[k][login]:
                            self.committers[k][login].append(xhash)
Пример #21
0
    def load_update_fetch(self, property_name, obj=None, force=False):
        '''Fetch a property for an issue object'''

        # A pygithub issue object has methods such as ...
        #   - get_events()
        #   - get_comments()
        # Those methods return a list with no update() property,
        # so we can't take advantage of the caching scheme used
        # for the issue it's self. Instead this function calls
        # those methods by their given name, and write the data
        # to a pickle file with a timestamp for the fetch time.
        # Upon later loading of the pickle, the timestamp is
        # compared to the issue's update_at timestamp and if the
        # pickle data is behind, the process will be repeated.

        edata = None
        events = []
        updated = None
        update = False
        write_cache = False

        pfile = os.path.join(self.full_cachedir, u'%s.pickle' % property_name)
        pdir = os.path.dirname(pfile)
        logging.debug(pfile)

        if not os.path.isdir(pdir):
            os.makedirs(pdir)

        if os.path.isfile(pfile):
            try:
                with open(pfile, 'rb') as f:
                    edata = pickle_load(f)
            except Exception as e:
                update = True
                write_cache = True

        # check the timestamp on the cache
        if edata:
            updated = edata[0]
            events = edata[1]
            if updated < self.instance.updated_at:
                update = True
                write_cache = True

        baseobj = None
        if obj:
            if obj == u'issue':
                baseobj = self.instance
            elif obj == u'pullrequest':
                baseobj = self.pullrequest
        else:
            if hasattr(self.instance, u'get_' + property_name):
                baseobj = self.instance
            else:
                if self.pullrequest:
                    if hasattr(self.pullrequest, u'get_' + property_name):
                        baseobj = self.pullrequest

        if not baseobj:
            logging.error(
                u'%s was not a property for the issue or the pullrequest' %
                property_name)
            if C.DEFAULT_BREAKPOINTS:
                logging.error(u'breakpoint!')
                import epdb
                epdb.st()
            else:
                raise Exception(u'property error')

        # pull all events if timestamp is behind or no events cached
        if update or not events or force:
            write_cache = True
            updated = datetime.datetime.utcnow()

            if not hasattr(baseobj, u'get_' + property_name) \
                    and hasattr(baseobj, property_name):
                # !callable properties
                try:
                    methodToCall = getattr(baseobj, property_name)
                except Exception as e:
                    logging.error(e)
                    if C.DEFAULT_BREAKPOINTS:
                        logging.error(u'breakpoint!')
                        import epdb
                        epdb.st()
                    else:
                        raise Exception(to_text(e))
                events = methodToCall
            else:
                # callable properties
                try:
                    methodToCall = getattr(baseobj, u'get_' + property_name)
                except Exception as e:
                    logging.error(e)
                    if C.DEFAULT_BREAKPOINTS:
                        logging.error(u'breakpoint!')
                        import epdb
                        epdb.st()
                    else:
                        raise Exception(to_text(e))
                events = [x for x in methodToCall()]

        if C.DEFAULT_PICKLE_ISSUES:
            if write_cache or not os.path.isfile(pfile) or force:
                # need to dump the pickle back to disk
                edata = [updated, events]
                with open(pfile, 'wb') as f:
                    pickle_dump(edata, f)

        return events
Пример #22
0
 def save_repo(self):
     with open(self.cachefile, 'wb') as f:
         pickle_dump(self.repo, f)
Пример #23
0
 def save_repo(self):
     with open(self.cachefile, 'wb') as f:
         pickle_dump(self.repo, f)
Пример #24
0
    def get_pullrequest_status(self, force_fetch=False):
        def sort_unique_statuses(statuses):
            '''reduce redundant statuses to the final run for each id'''
            result = []
            groups = []
            thisgroup = []
            for idx, x in enumerate(statuses):
                if not thisgroup:
                    thisgroup.append(x)
                    if idx == len(statuses) - 1:
                        groups.append(thisgroup)
                    continue
                else:
                    if thisgroup[-1][u'target_url'] == x[u'target_url']:
                        thisgroup.append(x)
                    else:
                        groups.append(thisgroup)
                        thisgroup = []
                        thisgroup.append(x)

                    if idx == len(statuses) - 1:
                        groups.append(thisgroup)

            for group in groups:
                group.sort(key=operator.itemgetter(u'updated_at'))
                result.append(group[-1])

            return result

        fetched = False
        jdata = None
        pdata = None
        # pull out the status url from the raw data
        rd = self.pullrequest_raw_data
        surl = rd[u'statuses_url']

        pfile = os.path.join(self.cachedir, u'issues', to_text(self.number),
                             u'pr_status.pickle')
        pdir = os.path.dirname(pfile)
        if not os.path.isdir(pdir):
            os.makedirs(pdir)

        if os.path.isfile(pfile):
            logging.info(u'pullrequest_status load pfile')
            with open(pfile, 'rb') as f:
                pdata = pickle_load(f)

        if pdata:
            # is the data stale?
            if pdata[0] < self.pullrequest.updated_at or force_fetch:
                logging.info(u'fetching pr status: stale, previous from %s' %
                             pdata[0])
                jdata = self._fetch_api_url(surl)
                self.log_ci_status(jdata)
                fetched = True
            else:
                jdata = pdata[1]

        # missing?
        if not jdata:
            logging.info(u'fetching pr status: !data')
            jdata = self._fetch_api_url(surl)
            fetched = True

        if fetched or not os.path.isfile(pfile):
            logging.info(u'writing %s' % pfile)
            pdata = (self.pullrequest.updated_at, jdata)
            with open(pfile, 'wb') as f:
                pickle_dump(pdata, f)

        # remove intermediate duplicates
        #jdata = sort_unique_statuses(jdata)

        return jdata
Пример #25
0
    def get_module_commits(self):
        keys = self.modules.keys()
        keys = sorted(keys)
        for k in keys:
            self.commits[k] = []
            cpath = os.path.join(self.gitrepo.checkoutdir, k)
            if not os.path.isfile(cpath):
                continue

            mtime = os.path.getmtime(cpath)
            refresh = False
            pfile = os.path.join(
                self.scraper_cache,
                k.replace(u'/', u'_') + u'.commits.pickle'
            )

            if not os.path.isfile(pfile):
                refresh = True
            else:
                pickle_kwargs = {'encoding': 'bytes'} if six.PY3 else {}
                print(pfile)
                with open(pfile, 'rb') as f:
                    pdata = pickle_load(f, **pickle_kwargs)
                if pdata[0] == mtime:
                    self.commits[k] = pdata[1]
                else:
                    refresh = True

            if refresh:
                logging.info(u'refresh commit cache for %s' % k)
                cmd = u'cd %s; git log --follow %s' % (self.gitrepo.checkoutdir, k)
                (rc, so, se) = run_command(cmd)
                for line in to_text(so).split(u'\n'):
                    if line.startswith(u'commit '):
                        commit = {
                            u'name': None,
                            u'email': None,
                            u'login': None,
                            u'hash': line.split()[-1],
                            u'date': None
                        }

                    # Author: Matt Clay <*****@*****.**>
                    if line.startswith(u'Author: '):
                        line = line.replace(u'Author: ', u'')
                        line = line.replace(u'<', u'')
                        line = line.replace(u'>', u'')
                        lparts = line.split()

                        if u'@' in lparts[-1]:
                            commit[u'email'] = lparts[-1]
                            commit[u'name'] = u' '.join(lparts[:-1])
                        else:
                            pass

                        if commit[u'email'] and \
                                u'noreply.github.com' in commit[u'email']:
                            commit[u'login'] = commit[u'email'].split(u'@')[0]

                    # Date:   Sat Jan 28 23:28:53 2017 -0800
                    if line.startswith(u'Date:'):
                        dstr = line.split(u':', 1)[1].strip()
                        dstr = u' '.join(dstr.split(u' ')[:-1])
                        ds = datetime.datetime.strptime(
                            to_text(dstr),
                            u'%a %b %d %H:%M:%S %Y'
                        )
                        commit[u'date'] = ds
                        self.commits[k].append(commit)

                with open(pfile, 'wb') as f:
                    pickle_dump((mtime, self.commits[k]), f)
Пример #26
0
    def _get_module_blames(self):
        ''' Scrape the blame page for each module and store it '''

        keys = sorted(self.modules.keys())

        # scrape the data
        for k in keys:

            cpath = os.path.join(self.gitrepo.checkoutdir, k)
            if not os.path.isfile(cpath):
                self.committers[k] = {}
                continue

            ghash = self.last_commit_for_file(k)
            pfile = os.path.join(
                self.scraper_cache,
                k.replace(u'/', u'_') + u'.blame.pickle'
            )
            sargs = [u'ansible', u'ansible', u'devel', k]

            refresh = False
            if not os.path.isfile(pfile):
                refresh = True
            else:
                logging.debug(u'load {}'.format(pfile))
                with open(pfile, 'rb') as f:
                    pdata = pickle_load(f)
                if C.DEFAULT_BREAKPOINTS:
                    logging.error(u'breakpoint!')
                    import epdb; epdb.st()
                if pdata[0] == ghash:
                    self.committers[k] = pdata[1]
                    if len(pdata) == 3:
                        # use emailmap if available
                        emailmap = pdata[2]
                    else:
                        emailmap = {}
                else:
                    refresh = True

            if refresh:
                if self.gqlc:
                    logging.debug(u'graphql blame usernames {}'.format(pfile))
                    uns, emailmap = self.gqlc.get_usernames_from_filename_blame(*sargs)
                else:
                    emailmap = {}  # scrapping: emails not available
                    logging.debug(u'www blame usernames {}'.format(pfile))
                    uns = self.gws.get_usernames_from_filename_blame(*sargs)
                self.committers[k] = uns
                with open(pfile, 'wb') as f:
                    pickle_dump((ghash, uns, emailmap), f)

            for email, github_id in emailmap.items():
                if email not in self.emails_cache:
                    self.emails_cache[email] = github_id

        # add scraped logins to the map
        for k in keys:
            for idx, x in enumerate(self.commits[k]):
                if x[u'email'] in [u'@']:
                    continue
                if x[u'email'] not in self.emails_cache:
                    self.emails_cache[x[u'email']] = None
                if x[u'login']:
                    self.emails_cache[x[u'email']] = x[u'login']
                    continue

                xhash = x[u'hash']
                for ck, cv in six.iteritems(self.committers[k]):
                    if xhash in cv:
                        self.emails_cache[x[u'email']] = ck
                        break

        # fill in what we can ...
        for k in keys:
            for idx, x in enumerate(self.commits[k]):
                if not x[u'login']:
                    if x[u'email'] in [u'@']:
                        continue
                    if self.emails_cache[x[u'email']]:
                        login = self.emails_cache[x[u'email']]
                        xhash = x[u'hash']
                        self.commits[k][idx][u'login'] = login
                        if login not in self.committers[k]:
                            self.committers[k][login] = []
                        if xhash not in self.committers[k][login]:
                            self.committers[k][login].append(xhash)
Пример #27
0
    def load_update_fetch(self, property_name, obj=None):
        '''Fetch a property for an issue object'''

        # A pygithub issue object has methods such as ...
        #   - get_events()
        #   - get_comments()
        # Those methods return a list with no update() property,
        # so we can't take advantage of the caching scheme used
        # for the issue it's self. Instead this function calls
        # those methods by their given name, and write the data
        # to a pickle file with a timestamp for the fetch time.
        # Upon later loading of the pickle, the timestamp is
        # compared to the issue's update_at timestamp and if the
        # pickle data is behind, the process will be repeated.

        edata = None
        events = []
        updated = None
        update = False
        write_cache = False

        pfile = os.path.join(
            self.cachedir,
            u'issues',
            to_text(self.instance.number),
            u'%s.pickle' % property_name
        )
        pdir = os.path.dirname(pfile)
        logging.debug(pfile)

        if not os.path.isdir(pdir):
            os.makedirs(pdir)

        if os.path.isfile(pfile):
            try:
                with open(pfile, 'rb') as f:
                    edata = pickle_load(f)
            except Exception as e:
                update = True
                write_cache = True

        # check the timestamp on the cache
        if edata:
            updated = edata[0]
            events = edata[1]
            if updated < self.instance.updated_at:
                update = True
                write_cache = True

        baseobj = None
        if obj:
            if obj == u'issue':
                baseobj = self.instance
            elif obj == u'pullrequest':
                baseobj = self.pullrequest
        else:
            if hasattr(self.instance, u'get_' + property_name):
                baseobj = self.instance
            else:
                if self.pullrequest:
                    if hasattr(self.pullrequest, u'get_' + property_name):
                        baseobj = self.pullrequest

        if not baseobj:
            logging.error(
                u'%s was not a property for the issue or the pullrequest'
                % property_name
            )
            if C.DEFAULT_BREAKPOINTS:
                logging.error(u'breakpoint!')
                import epdb; epdb.st()
            else:
                raise Exception(u'property error')

        # pull all events if timestamp is behind or no events cached
        if update or not events:
            write_cache = True
            updated = self.get_current_time()

            if not hasattr(baseobj, u'get_' + property_name) \
                    and hasattr(baseobj, property_name):
                # !callable properties
                try:
                    methodToCall = getattr(baseobj, property_name)
                except Exception as e:
                    logging.error(e)
                    if C.DEFAULT_BREAKPOINTS:
                        logging.error(u'breakpoint!')
                        import epdb; epdb.st()
                    else:
                        raise Exception(to_text(e))
                events = methodToCall
            else:
                # callable properties
                try:
                    methodToCall = getattr(baseobj, u'get_' + property_name)
                except Exception as e:
                    logging.error(e)
                    if C.DEFAULT_BREAKPOINTS:
                        logging.error(u'breakpoint!')
                        import epdb; epdb.st()
                    else:
                        raise Exception(to_text(e))
                events = [x for x in methodToCall()]

        if write_cache or not os.path.isfile(pfile):
            # need to dump the pickle back to disk
            edata = [updated, events]
            with open(pfile, 'wb') as f:
                pickle_dump(edata, f)

        return events
Пример #28
0
    def get_pullrequest_status(self, force_fetch=False):

        def sort_unique_statuses(statuses):
            '''reduce redundant statuses to the final run for each id'''
            result = []
            groups = []
            thisgroup = []
            for idx, x in enumerate(statuses):
                if not thisgroup:
                    thisgroup.append(x)
                    if idx == len(statuses) - 1:
                        groups.append(thisgroup)
                    continue
                else:
                    if thisgroup[-1][u'target_url'] == x[u'target_url']:
                        thisgroup.append(x)
                    else:
                        groups.append(thisgroup)
                        thisgroup = []
                        thisgroup.append(x)

                    if idx == len(statuses) - 1:
                        groups.append(thisgroup)

            for group in groups:
                group.sort(key=operator.itemgetter(u'updated_at'))
                result.append(group[-1])

            return result

        fetched = False
        jdata = None
        pdata = None
        # pull out the status url from the raw data
        rd = self.pullrequest_raw_data
        surl = rd[u'statuses_url']

        pfile = os.path.join(self.full_cachedir, u'pr_status.pickle')
        pdir = os.path.dirname(pfile)
        if not os.path.isdir(pdir):
            os.makedirs(pdir)

        if os.path.isfile(pfile):
            logging.info(u'pullrequest_status load pfile')
            with open(pfile, 'rb') as f:
                pdata = pickle_load(f)

        if pdata:
            # is the data stale?
            if pdata[0] < self.pullrequest.updated_at or force_fetch:
                logging.info(u'fetching pr status: stale, previous from %s' % pdata[0])
                jdata = self.github.get_request(surl)

                if isinstance(jdata, dict):
                    # https://github.com/ansible/ansibullbot/issues/959
                    logging.error(u'Got the following error while fetching PR status: %s', jdata.get(u'message'))
                    logging.error(jdata)
                    return []

                self.log_ci_status(jdata)
                fetched = True
            else:
                jdata = pdata[1]

        # missing?
        if not jdata:
            logging.info(u'fetching pr status: !data')
            jdata = self.github.get_request(surl)
            # FIXME? should we self.log_ci_status(jdata) here too?
            fetched = True

        if fetched or not os.path.isfile(pfile):
            logging.info(u'writing %s' % pfile)
            pdata = (self.pullrequest.updated_at, jdata)
            with open(pfile, 'wb') as f:
                pickle_dump(pdata, f)

        # remove intermediate duplicates
        #jdata = sort_unique_statuses(jdata)

        return jdata
Пример #29
0
    def get_module_commits(self):
        keys = self.modules.keys()
        keys = sorted(keys)
        for k in keys:
            self.commits[k] = []
            cpath = os.path.join(self.gitrepo.checkoutdir, k)
            if not os.path.isfile(cpath):
                continue

            mtime = os.path.getmtime(cpath)
            refresh = False
            pfile = os.path.join(self.scraper_cache,
                                 k.replace(u'/', u'_') + u'.commits.pickle')

            if not os.path.isfile(pfile):
                refresh = True
            else:
                pickle_kwargs = {'encoding': 'bytes'} if six.PY3 else {}
                print(pfile)
                with open(pfile, 'rb') as f:
                    pdata = pickle_load(f, **pickle_kwargs)
                if pdata[0] == mtime:
                    self.commits[k] = pdata[1]
                else:
                    refresh = True

            if refresh:
                logging.info(u'refresh commit cache for %s' % k)
                cmd = u'cd %s; git log --follow %s' % (
                    self.gitrepo.checkoutdir, k)
                (rc, so, se) = run_command(cmd)
                for line in to_text(so).split(u'\n'):
                    if line.startswith(u'commit '):
                        commit = {
                            u'name': None,
                            u'email': None,
                            u'login': None,
                            u'hash': line.split()[-1],
                            u'date': None
                        }

                    # Author: Matt Clay <*****@*****.**>
                    if line.startswith(u'Author: '):
                        line = line.replace(u'Author: ', u'')
                        line = line.replace(u'<', u'')
                        line = line.replace(u'>', u'')
                        lparts = line.split()

                        if u'@' in lparts[-1]:
                            commit[u'email'] = lparts[-1]
                            commit[u'name'] = u' '.join(lparts[:-1])
                        else:
                            pass

                        if commit[u'email'] and \
                                u'noreply.github.com' in commit[u'email']:
                            commit[u'login'] = commit[u'email'].split(u'@')[0]

                    # Date:   Sat Jan 28 23:28:53 2017 -0800
                    if line.startswith(u'Date:'):
                        dstr = line.split(u':', 1)[1].strip()
                        dstr = u' '.join(dstr.split(u' ')[:-1])
                        ds = datetime.datetime.strptime(
                            to_text(dstr), u'%a %b %d %H:%M:%S %Y')
                        commit[u'date'] = ds
                        self.commits[k].append(commit)

                with open(pfile, 'wb') as f:
                    pickle_dump((mtime, self.commits[k]), f)
Пример #30
0
    def get_pullrequest_status(self, force_fetch=False):

        def sort_unique_statuses(statuses):
            '''reduce redundant statuses to the final run for each id'''
            result = []
            groups = []
            thisgroup = []
            for idx, x in enumerate(statuses):
                if not thisgroup:
                    thisgroup.append(x)
                    if idx == len(statuses) - 1:
                        groups.append(thisgroup)
                    continue
                else:
                    if thisgroup[-1][u'target_url'] == x[u'target_url']:
                        thisgroup.append(x)
                    else:
                        groups.append(thisgroup)
                        thisgroup = []
                        thisgroup.append(x)

                    if idx == len(statuses) - 1:
                        groups.append(thisgroup)

            for group in groups:
                group.sort(key=operator.itemgetter(u'updated_at'))
                result.append(group[-1])

            return result

        fetched = False
        jdata = None
        pdata = None
        # pull out the status url from the raw data
        rd = self.pullrequest_raw_data
        surl = rd[u'statuses_url']

        pfile = os.path.join(
            self.cachedir,
            u'issues',
            to_text(self.number),
            u'pr_status.pickle'
        )
        pdir = os.path.dirname(pfile)
        if not os.path.isdir(pdir):
            os.makedirs(pdir)

        if os.path.isfile(pfile):
            logging.info(u'pullrequest_status load pfile')
            with open(pfile, 'rb') as f:
                pdata = pickle_load(f)

        if pdata:
            # is the data stale?
            if pdata[0] < self.pullrequest.updated_at or force_fetch:
                logging.info(u'fetching pr status: stale, previous from %s' % pdata[0])
                jdata = self._fetch_api_url(surl)
                self.log_ci_status(jdata)
                fetched = True
            else:
                jdata = pdata[1]

        # missing?
        if not jdata:
            logging.info(u'fetching pr status: !data')
            jdata = self._fetch_api_url(surl)
            fetched = True

        if fetched or not os.path.isfile(pfile):
            logging.info(u'writing %s' % pfile)
            pdata = (self.pullrequest.updated_at, jdata)
            with open(pfile, 'wb') as f:
                pickle_dump(pdata, f)

        # remove intermediate duplicates
        #jdata = sort_unique_statuses(jdata)

        return jdata