示例#1
0
 def start(self):
     """Return whether we ran anything."""
     self.load_state()
     self.sync_state()
     self.image_manager.start()
     if not self.shared_file_system:
         self.dependency_manager.start()
     while not self.terminate:
         try:
             self.process_runs()
             self.save_state()
             self.checkin()
             self.check_termination()
             self.save_state()
             if self.check_idle_stop() or self.check_num_runs_stop():
                 self.terminate = True
             else:
                 time.sleep(self.checkin_frequency_seconds)
         except Exception:
             self.last_checkin_successful = False
             if using_sentry():
                 capture_exception()
             traceback.print_exc()
             if self.exit_on_exception:
                 logger.warning(
                     'Encountered exception, terminating the worker after sleeping for 5 minutes...'
                 )
                 self.terminate = True
                 # Sleep for 5 minutes
                 time.sleep(5 * 60)
             else:
                 # Sleep for a long time so we don't keep on failing.
                 # We sleep in 5-second increments to check
                 # if the worker needs to terminate (say, if it's received
                 # a SIGTERM signal).
                 logger.warning(
                     'Sleeping for 1 hour due to exception...please help me!'
                 )
                 for _ in range(12 * 60):
                     # We run this here, instead of going through another iteration of the
                     # while loop, to minimize the code that's run---the reason we ended up here
                     # in the first place is because of an exception, so we don't want to
                     # re-trigger that exception.
                     if self.terminate_and_restage:
                         # If self.terminate_and_restage is true, self.check_termination()
                         # restages bundles. We surround this in a try-except block,
                         # so we can still properly terminate and clean up
                         # even if self.check_termination() fails for some reason.
                         try:
                             self.check_termination()
                         except Exception:
                             traceback.print_exc()
                         self.terminate = True
                     if self.terminate:
                         break
                     time.sleep(5)
     self.cleanup()
        def image_availability_state(image_spec, success_message,
                                     failure_message):
            """
            Try to get the image specified by image_spec from host machine.
            Return ImageAvailabilityState.
            """
            try:
                image = self._docker.images.get(image_spec)
                digests = image.attrs.get('RepoDigests', [image_spec])
                digest = digests[0] if len(digests) > 0 else None
                new_timestamp = str(time.time())
                image.tag(self.CACHE_TAG, tag=new_timestamp)
                for tag in image.tags:
                    tag_label, timestamp = tag.split(":")
                    # remove any other timestamp but not the current one
                    if tag_label == self.CACHE_TAG and timestamp != new_timestamp:
                        try:
                            self._docker.images.remove(tag)
                        except docker.errors.NotFound as err:
                            # It's possible that we get a 404 not found error here when removing the image,
                            # since another worker on the same system has already done so. We just
                            # ignore this 404, since any extraneous tags will be removed during the next iteration.
                            logger.warning(
                                "Attempted to remove image %s from cache, but image was not found: %s",
                                tag,
                                err,
                            )

                return ImageAvailabilityState(digest=digest,
                                              stage=DependencyStage.READY,
                                              message=success_message)
            except Exception as ex:
                if using_sentry():
                    capture_exception()
                return ImageAvailabilityState(digest=None,
                                              stage=DependencyStage.FAILED,
                                              message=failure_message % ex)