Exemplo n.º 1
0
    def run(self,
            source_url,
            repository_name,
            changeset=None,
            last_push_id=None):
        print repository_name
        if not last_push_id:
            # get the last object seen from cache. this will
            # reduce the number of pushes processed every time
            last_push_id = cache.get(
                "{0}:last_push_id".format(repository_name))

        if not changeset and last_push_id:
            startid_url = "{}&startID={}".format(source_url, last_push_id)
            logger.info(
                "Extracted last push for '%s', '%s', from cache, "
                "attempting to get changes only from that point at: %s" %
                (repository_name, last_push_id, startid_url))
            # Use the cached ``last_push_id`` value (saved from the last time
            # this API was called) for this repo.  Use that value as the
            # ``startID`` to get all new pushes from that point forward.
            extracted_content = self.extract(startid_url)

            if extracted_content['lastpushid'] < last_push_id:
                # Push IDs from Mercurial are incremental.  If we cached a value
                # from one call to this API, and a subsequent call told us that
                # the ``lastpushid`` is LOWER than the one we have cached, then
                # the Mercurial IDs were reset.
                # In this circumstance, we can't rely on the cached id, so must
                # throw it out and get the latest 10 pushes.
                logger.warning(
                    ("Got a ``lastpushid`` value of {} lower than "
                     "the cached value of {} due to Mercurial repo reset.  "
                     "Getting latest changes for '{}' instead").format(
                         extracted_content['lastpushid'], last_push_id,
                         repository_name))
                cache.delete("{0}:last_push_id".format(repository_name))
                extracted_content = self.extract(source_url)
        else:
            if changeset:
                logger.info("Getting all pushes for '%s' corresponding to "
                            "changeset '%s'" % (repository_name, changeset))
                extracted_content = self.extract(source_url + "&changeset=" +
                                                 changeset)
            else:
                logger.warning("Unable to get last push from cache for '%s', "
                               "getting all pushes" % repository_name)
                extracted_content = self.extract(source_url)

        # ``pushes`` could be empty if there are no new ones since we last
        # fetched
        pushes = extracted_content['pushes']

        if not pushes:
            return None

        last_push_id = max(map(lambda x: int(x), pushes.keys()))
        last_push = pushes[str(last_push_id)]
        top_revision = last_push["changesets"][-1]["node"]
        # TODO: further remove the use of client types here
        transformed = self.transform(pushes, repository_name)

        errors = []
        repository = Repository.objects.get(name=repository_name)
        for collection in transformed[repository_name].get_chunks(
                chunk_size=1):
            try:
                collection.validate()
                store_result_set_data(repository,
                                      collection.get_collection_data())
            except Exception:
                newrelic.agent.record_exception()
                errors.append({
                    "project": repository,
                    "collection": "result_set",
                    "message": traceback.format_exc()
                })

        if errors:
            raise CollectionNotStoredException(errors)

        if not changeset:
            # only cache the last push if we're not fetching a specific
            # changeset
            cache.set("{0}:last_push_id".format(repository_name), last_push_id)

        return top_revision
Exemplo n.º 2
0
    def run(self,
            source_url,
            repository_name,
            changeset=None,
            last_push_id=None):
        cache_key = '{}:last_push_id'.format(repository_name)
        if not last_push_id:
            # get the last object seen from cache. this will
            # reduce the number of pushes processed every time
            last_push_id = cache.get(cache_key)

        if not changeset and last_push_id:
            startid_url = "{}&startID={}".format(source_url, last_push_id)
            logger.info(
                "Extracted last push for '%s', '%s', from cache, "
                "attempting to get changes only from that point at: %s",
                repository_name, last_push_id, startid_url)
            # Use the cached ``last_push_id`` value (saved from the last time
            # this API was called) for this repo.  Use that value as the
            # ``startID`` to get all new pushes from that point forward.
            extracted_content = self.extract(startid_url)

            if extracted_content['lastpushid'] < last_push_id:
                # Push IDs from Mercurial are incremental.  If we cached a value
                # from one call to this API, and a subsequent call told us that
                # the ``lastpushid`` is LOWER than the one we have cached, then
                # the Mercurial IDs were reset.
                # In this circumstance, we can't rely on the cached id, so must
                # throw it out and get the latest 10 pushes.
                logger.warning(
                    "Got a ``lastpushid`` value of %s lower than the cached value of %s "
                    "due to Mercurial repo reset. Getting latest changes for '%s' instead",
                    extracted_content['lastpushid'], last_push_id,
                    repository_name)
                cache.delete(cache_key)
                extracted_content = self.extract(source_url)
        else:
            if changeset:
                logger.info(
                    "Getting all pushes for '%s' corresponding to "
                    "changeset '%s'", repository_name, changeset)
                extracted_content = self.extract(source_url + "&changeset=" +
                                                 changeset)
            else:
                logger.warning(
                    "Unable to get last push from cache for '%s', "
                    "getting all pushes", repository_name)
                extracted_content = self.extract(source_url)

        pushes = extracted_content['pushes']

        # `pushes` could be empty if there are no new ones since we last fetched
        if not pushes:
            return None

        last_push_id = max(map(int, pushes.keys()))
        last_push = pushes[str(last_push_id)]
        top_revision = last_push["changesets"][-1]["node"]

        errors = []
        repository = Repository.objects.get(name=repository_name)

        for push in pushes.values():
            if not push['changesets']:
                # A push without commits means it was marked as obsolete (see bug 1286426).
                # Without them it's not possible to calculate the push revision required for ingestion.
                continue

            try:
                store_push(repository, self.transform_push(push))
            except Exception:
                newrelic.agent.record_exception()
                errors.append({
                    "project": repository,
                    "collection": "result_set",
                    "message": traceback.format_exc()
                })

        if errors:
            raise CollectionNotStoredException(errors)

        if not changeset:
            # only cache the last push if we're not fetching a specific changeset
            cache.set(cache_key, last_push_id, ONE_WEEK_IN_SECONDS)

        return top_revision