Exemplo n.º 1
0
    def __init__(self,
                 amqp_url='amqp://*****:*****@127.0.0.1:5672',
                 exchange=None,
                 queue=None,
                 route_key=None,
                 software_url="http://127.0.0.1:3001/api/software",
                 images_url="http://127.0.0.1:3000/api/images",
                 hub_url="https://hub.docker.com/",
                 rmi=True):

        self.rmi = rmi  # remove an image after it is scanned

        self.logger = logging.getLogger(__class__.__name__)
        self.logger.info(__class__.__name__ + " logger  initialized")

        # client of Software service: the service that return the software to
        # search in the images.
        self.client_software = ClientSoftware(api_url=software_url)

        # client of Docker daemon running on the local host
        self.client_daemon = docker.DockerClient(
            base_url='unix://var/run/docker.sock')

        # rabbit consumer of RabbittMQ: receives the images name to scan,
        #   on_message_callback is called when a message is received
        self.consumer = ConsumerRabbit(amqp_url=amqp_url,
                                       exchange=exchange,
                                       queue=queue,
                                       route_key=route_key,
                                       on_msg_callback=self.on_message)

        # client of Images Service:  in order to add and update the images
        self.client_images = ClientImages(images_url=images_url)
Exemplo n.º 2
0
    def __init__(self,
                 images_url="http://127.0.0.1:3000/api/images",
                 hub_url="https://hub.docker.com/",
                 amqp_url='amqp://*****:*****@127.0.0.1:5672',
                 exchange="dofinder",
                 queue="images",
                 route_key="images.scan",
                 path_file_logging=None):

        # For publishing into RabbitMq queue the iamge name
        self.url_amqp = amqp_url
        self._exchange = exchange
        self._route_key = route_key

        # stream handler logger

        self.logger = logging.getLogger(__class__.__name__)
        self.logger.info(__class__.__name__ + " logger  initialized")

        # file handler logger
        self.path_file_logging = path_file_logging
        self.file_logger = None
        if path_file_logging:
            name_file_logger = __class__.__name__ + "-rotated"
            self.file_logger = logging.getLogger(name_file_logger)

            self.file_logger.setLevel(logging.DEBUG)

            interval = 24
            backupCount = 10  # 10 giorni di backup
            self.logger.info("LOGGING PATH: " + path_file_logging +
                             " every hour=" + str(interval) +
                             " with backupcount=" + str(backupCount))

            handler = TimedRotatingFileHandler(path_file_logging,
                                               when="h",
                                               interval=interval,
                                               backupCount=backupCount)

            #fh = logging.FileHandler(path_file_logging)
            # fh.setLevel(logging.DEBUG)
            LOG_FORMAT = ('%(asctime)s %(message)s')
            formatter = logging.Formatter(LOG_FORMAT)
            # fh.setFormatter(formatter)
            # add the file handlers handlers to the logger
            # self.file_logger.addHandler(fh)
            handler.setLevel(logging.INFO)
            handler.setFormatter(formatter)
            self.file_logger.addHandler(handler)
            #str(tot_hub_images)+":"+str(tot_dockerfinder_images)+":"+ str(removed)+":"+str(pending)+":"+str(uptodate)
            self.file_logger.info(
                "hubtot:dftot:dfremoved:dfpending:dfuptodate")

        # client of Images Service:  in order to add and update the image
        # description.
        self.client_images = ClientImages(images_url=images_url)

        # client of Docker Hub.
        self.client_hub = ClientHub(docker_hub_endpoint=hub_url)
Exemplo n.º 3
0
    def __init__(self,
                 exchange="dofinder",
                 queue="images",
                 route_key="images.scan",
                 amqp_url='amqp://*****:*****@127.0.0.1:5672',
                 images_url="http://127.0.0.1:3000/api/images",
                 hub_url="https://hub.docker.com",
                 path_last_url="/data/crawler/lasturl.txt",
                 policy="none",
                 min_stars=0,
                 min_pulls=0,
                 only_automated=False,
                 only_official=False):

        self.logger = logging.getLogger(__class__.__name__)
        self.logger.info(__class__.__name__ + " logger  initialized")

        # publish the images downloaded into the rabbitMQ server.
        self.publisher = PublisherRabbit(amqp_url,
                                         exchange=exchange,
                                         queue=queue,
                                         route_key=route_key)
        self.logger.info("RabbitMQ : exchange=" + exchange + ", queue=" +
                         queue + " route key=" + route_key)

        # Client of Docker Hub.
        self.client_hub = ClientHub(docker_hub_endpoint=hub_url,
                                    path_last_url=path_last_url)

        # client of Images Service:  if an image is NEW it is sent to queue,
        # otherwise it is discarded
        self.client_images = ClientImages(images_url=images_url)

        # ordering = {"stars":"star_count", "-stars":"-star_count",
        # "pulls":"pull_count", "-pulls":"-pull_count"}
        ordi = {
            "stars_first": "-star_count",
            "pulls_first": "-pull_count",
            "none": None
        }
        # {"stars_first":"-star_count", "pulls_first"  : "-pull_count",
        #  "none":None}.get(policy, None)
        self.ordering = ordi[policy]
        self.policy = policy

        self.min_stars = min_stars
        self.min_pulls = min_pulls
        self.only_automated = only_automated
        self.only_official = only_official
        self.logger.info(
            "Crawler: ordering={}, policy={}, min_stars={}, min_pulls={}, only_official={}, only_automated={}"
            .format(self.ordering, self.policy, min_stars, min_pulls,
                    only_official, only_automated))
Exemplo n.º 4
0
class Checker:
    def __init__(self,
                 images_url="http://127.0.0.1:3000/api/images",
                 hub_url="https://hub.docker.com/",
                 amqp_url='amqp://*****:*****@127.0.0.1:5672',
                 exchange="dofinder",
                 queue="images",
                 route_key="images.scan",
                 path_file_logging=None):

        # For publishing into RabbitMq queue the iamge name
        self.url_amqp = amqp_url
        self._exchange = exchange
        self._route_key = route_key

        # stream handler logger

        self.logger = logging.getLogger(__class__.__name__)
        self.logger.info(__class__.__name__ + " logger  initialized")

        # file handler logger
        self.path_file_logging = path_file_logging
        self.file_logger = None
        if path_file_logging:
            name_file_logger = __class__.__name__ + "-rotated"
            self.file_logger = logging.getLogger(name_file_logger)

            self.file_logger.setLevel(logging.DEBUG)

            interval = 24
            backupCount = 10  # 10 giorni di backup
            self.logger.info("LOGGING PATH: " + path_file_logging +
                             " every hour=" + str(interval) +
                             " with backupcount=" + str(backupCount))

            handler = TimedRotatingFileHandler(path_file_logging,
                                               when="h",
                                               interval=interval,
                                               backupCount=backupCount)

            #fh = logging.FileHandler(path_file_logging)
            # fh.setLevel(logging.DEBUG)
            LOG_FORMAT = ('%(asctime)s %(message)s')
            formatter = logging.Formatter(LOG_FORMAT)
            # fh.setFormatter(formatter)
            # add the file handlers handlers to the logger
            # self.file_logger.addHandler(fh)
            handler.setLevel(logging.INFO)
            handler.setFormatter(formatter)
            self.file_logger.addHandler(handler)
            #str(tot_hub_images)+":"+str(tot_dockerfinder_images)+":"+ str(removed)+":"+str(pending)+":"+str(uptodate)
            self.file_logger.info(
                "hubtot:dftot:dfremoved:dfpending:dfuptodate")

        # client of Images Service:  in order to add and update the image
        # description.
        self.client_images = ClientImages(images_url=images_url)

        # client of Docker Hub.
        self.client_hub = ClientHub(docker_hub_endpoint=hub_url)

    def send_to_rabbitmq(self, msg):
        connection = pika.BlockingConnection(pika.URLParameters(self.url_amqp))
        self.logger.info("connected to " + self.url_amqp)
        # Open the channel
        channel = connection.channel()
        # Declare the queue
        self.logger.info(self._route_key)
        channel.basic_publish(exchange=self._exchange,
                              routing_key=self._route_key,
                              body=msg)

        self.logger.info(msg + " sent to " + self._exchange)

        connection.close()

    def check_images(self):
        """
        scan the images.
        """
        checked = {}
        tot_dockerfinder_images = 0
        removed = 0
        pending = 0
        uptodate = 0

        json_res = self.client_images.get_images()
        tot_dockerfinder_images = json_res['count']

        self.logger.info(
            str(tot_dockerfinder_images) +
            " images present into local database")
        tot_hub_images = self.client_hub.count_all_images()
        self.logger.info(
            str(tot_hub_images) + ": images present into Docker Hub")
        images = json_res['images']
        for image in images:
            name = image['name']
            #splitname = image['name'].split(":")
            repo = image['repo_name']  # splitname[0]
            tag = image['tag']  # splitname[1]
            image_id = image['_id']
            if self.client_hub.is_alive_in_hub(repo, tag=tag):
                self.logger.debug("[" + name + "] status:" + image['status'])
                if image['status'] == "updated":
                    # str(datetime.datetime.min)
                    if self.client_images.must_scanned(
                            name,
                            image['last_updated'] if image['last_updated'] else
                            "2000-01-01T00:00:00.000Z"):
                        self.logger.debug("[" + name +
                                          "] must be scanned again.")
                        self.client_images.update_status(
                            image_id, "pending")  # Set status to Pending
                        self.logger.info("[" + name +
                                         "] from UPDATED to PENDING status.")
                        self.send_to_rabbitmq(
                            json.dumps({
                                "name": name,
                                "repo_name": repo,
                                "tag": tag
                            }))
                        self.logger.info("[" + name + "] requeud into queue.")
                        # checked['pending'].append(name)
                        pending += 1
                    else:
                        self.logger.info("[" + name +
                                         "] remains UPDATED status.")
                        uptodate += 1
                if image['status'] == "pending":
                    self.logger.info("[" + name + "] remains PENDING status")
                    pending += 1
            else:
                # the image is removed from the database if it is not present
                # into Docker Hub
                self.logger.info(
                    "[" + name +
                    "] deleted from local db because it is not alive in DockerHub"
                )
                self.client_images.delete_image(image_id)
                removed += 1
        if self.path_file_logging:
            self.file_logger.info(
                str(tot_hub_images) + ":" + str(tot_dockerfinder_images) +
                ":" + str(removed) + ":" + str(pending) + ":" + str(uptodate))
        assert tot_dockerfinder_images == (pending + uptodate + removed)
        self.logger.info("Removed=" + str(removed) + "; pending=" +
                         str(pending) + " up-to-date=" + str(uptodate))

    def verify_images(self):
        """
        Scan all the images in the local databse and fix the problems
           1) having the ".go" or "." version of a software.
           2) is_private = null,
           3) is_automated = nul
        by updating the boolean value from Doker Hub.

        """

        json_res = self.client_images.get_images()
        tot_dockerfinder_images = json_res['count']
        self.logger.info(
            str(tot_dockerfinder_images) +
            " images present into local database")
        images = json_res['images']
        updated = 0
        for image in images:
            name = image['name']
            splitname = image['name'].split(":")
            repo = splitname[0]
            tag = splitname[1]
            json_response = self.client_hub.get_json_repo(repo)
            if json_response:
                if "is_automated" in json_response:
                    image['is_automated'] = json_response['is_automated']

                if "is_private" in json_response:
                    image['is_private'] = json_response['is_private']
                softwares = image['softwares']
                self.logger.info("before: {0}".format(softwares))
                # [0-9]+[.][0-9]*[.0-9]

                softwares = [sw for sw in softwares if sw['ver'] != '.']
                softwares = [sw for sw in softwares if sw['ver'] != ".go"]
                # for sw in softwares:
                #    if ".go" in sw['ver'] or sw['ver'] == ".":
                #        self.logger.info("removing {0}:{1}".format(sw['software'], sw['ver']))
                #        softwares.remove(sw)
                self.logger.info("after: {0}".format(softwares))
                image['softwares'] = softwares

            # PUT the new image description of the image
            self.client_images.put_image(image)
            updated += 1
            self.logger.info(
                "UPDATED [" + name +
                "]. {0}/{1}".format(updated, tot_dockerfinder_images))

    def run(self, interval_next_check):
        self.logger.info("Starting the checker module...")
        while True:
            try:
                self.check_images()
                time.sleep(interval_next_check)
            except Exception as e:
                self.logger.error(str(e))
                self.logger.error("Waiting 5s and restarting.")
                time.sleep(5)
Exemplo n.º 5
0
class Scanner:
    def __init__(self,
                 amqp_url='amqp://*****:*****@127.0.0.1:5672',
                 exchange=None,
                 queue=None,
                 route_key=None,
                 software_url="http://127.0.0.1:3001/api/software",
                 images_url="http://127.0.0.1:3000/api/images",
                 hub_url="https://hub.docker.com/",
                 rmi=True):

        self.rmi = rmi  # remove an image after it is scanned

        self.logger = logging.getLogger(__class__.__name__)
        self.logger.info(__class__.__name__ + " logger  initialized")

        # client of Software service: the service that return the software to
        # search in the images.
        self.client_software = ClientSoftware(api_url=software_url)

        # client of Docker daemon running on the local host
        self.client_daemon = docker.DockerClient(
            base_url='unix://var/run/docker.sock')

        # rabbit consumer of RabbittMQ: receives the images name to scan,
        #   on_message_callback is called when a message is received
        self.consumer = ConsumerRabbit(amqp_url=amqp_url,
                                       exchange=exchange,
                                       queue=queue,
                                       route_key=route_key,
                                       on_msg_callback=self.on_message)

        # client of Images Service:  in order to add and update the images
        self.client_images = ClientImages(images_url=images_url)

        # client of Docker Hub.
        # self.client_hub = ClientHub(docker_hub_endpoint=hub_url)

    def run(self):
        """Start the scanner running the consumer client of the RabbitMQ server."""

        try:
            self.consumer.run()
        except KeyboardInterrupt:
            self.consumer.stop()

    def on_message(self, json_image):
        """
        This is the CALLBACK function that is called when the consumer
        Rabbit receives a message.
        """

        image = Image(from_dict_image=json_image)

        self.logger.info("Received Image {}".format(image))

        # first method called when an image name is received
        attempt = 1
        processed = False
        while attempt < 4 and not processed:
            try:
                self.logger.info("[{}] start scan".format(image.name))
                self.process_repo_name(image)
                processed = True
            except (docker.errors.APIError,
                    docker.errors.NotFound) as e:  # docker.errors.NotFound:
                self.logger.error(str(e) + ": retry number " + str(attempt))
                attempt += 1
            except Exception as e:
                excepName = type(
                    e).__name__  # returns the name of the exception
                self.logger.error(" {} excpetion: {}".format(
                    excepName, str(e)))
                attempt += 1

        if processed is False:
            self.logger.warning("{} - PURGER from the queue after {}"
                                "attempt".format(image.name, attempt))
        return processed

    def process_repo_name(self, image):
        """
        Process a single image.
        It checks if an image must Scanned or it is already updated.
        """
        self.logger.debug("[{}] process repo".format(image.name))
        if self.client_images.is_new(image.name):
            self.logger.info("[{}] is new into local database".format(
                image.name))
            self.scan(image)
            self.client_images.post_image(image.to_dict())
            self.logger.info(
                "[{}] - added to images server succesfully".format(image.name))

        # TODO: Non chiama Docker Hub ???  Ma chiama le images servers per capire se image must be scan again
        elif self.client_images.must_scanned(image.name, image.last_updated):
            self.logger.info(
                "[{}] is present into images server but must be scan again".
                format(image.name))
            self.scan(image)
            #self.logger.info("[{}] - PUT to images server".format(image.name))
            self.client_images.put_image(image.to_dict())  # PUT the new image
            self.logger.info(
                "[{}] - updated into images server succesfully".format(
                    image.name))
        else:
            self.logger.info("[{}] - uptodate into images server".format(
                image.name))

    def scan(self, image):
        """
        It scans an image and create the new Docker finder description.
        """
        # repo_name = image.repo_name
        # tag = image.tag
        # repo_name_tag = image.name

        self.logger.info("[{}] pulling the image ...".format(image.name))

        try:
            img = self.client_daemon.images.pull(image.name)

            self.logger.debug('[{0}] start scanning'.format(image.name))

            # search software versions and system commands
            self.logger.info('[{0}] extracing softwares versions ...'.format(
                image.name))
            self.info_dofinder(image)

            # add informatiom from the inspect command
            self.logger.info('[{0}] adding docker inspect info....'.format(
                image.name))
            self.info_inspect(image)

            self.logger.info('[{0}] finish scanning'.format(image.name))
            image.last_scan = str(datetime.datetime.now())

            # set updated time
            image.set_updated()

            if self.rmi:
                self.client_daemon.images.remove(image.name, force=True)
                self.logger.info('[{0}] removed image'.format(image.name))
        except (docker.errors.APIError, docker.errors.NotFound) as e:
            self.logger.error(str(e))
            self.client_daemon.images.remove(image.name, force=True)

        return image

    def info_dofinder(self, image):
        """
         Extracts the OS distribution and the software versions in the image
        """
        name = image.name

        self.logger.debug('[{}] searching software ... '.format(name))

        # create the container
        entrypoint = "sleep 1000000000"  # "ping 127.0.0.1" | ping -i 10000 127.0.0.1"
        self.logger.debug("[{}] creating container with entrypoint ={}".format(
            name, entrypoint))

        try:
            container = self.client_daemon.containers.create(
                image=name, entrypoint=entrypoint)

            # start the container with sleep
            container.start()
            image.softwares = self._extract_softwares(container)
            image.distro = self._extract_distribution(container)
            container.stop(timeout=1)

            container.remove(v=True)
        except (docker.errors.ImageNotFound, docker.errors.APIError) as e:
            container.remove(v=True, force=True)
            self.logger.error(str(e))
            raise
        # # remove the contatiner
        # self.client_daemon.remove_container(container_id, force=True, v=True)
        # self.logger.info('[{}] : found {} softwares [{}] '.format(
        #     image.name, len(softwares), softwares))

    def _extract_distribution(self, container):
        for command, regex in self.client_software.get_system():
            res = container.exec_run(cmd=command)
            output = res.decode()
            prog = re.compile(regex)
            match = prog.search(output)
            if match:
                distro = match.group(0)
                self.logger.info("{0} found.".format(distro))
                return distro
            else:
                self.logger.debug("[{0}] NOT found in ".format(command))
        return None

    def _extract_softwares(self, container):
        # list of software distributions found in the image.
        softwares = []
        for sw in self.client_software.get_software():
            # "name":"python", "cmd":"--version", "regex":"[0-9]+\\.[0-9]+(\\.[0-9]+)*"
            command = sw['name'] + " " + sw['cmd']
            res = container.exec_run(cmd=command)
            output = res.decode()
            prog = re.compile(sw['regex'])
            match = prog.search(output)
            if match:
                version = match.group(0)
                if version != "." or version != ".go":
                    softwares.append({'software': sw['name'], 'ver': version})
                    self.logger.debug("{0} {1} found.".format(
                        sw['name'], version))
            else:
                self.logger.debug("[{0}] NOT found in ".format(sw['name']))
        self.logger.info("{} software found".format(len(softwares)))
        self.logger.debug('Software found: [' +
                          ''.join('{} {},'.format(s['software'], s['ver'])
                                  for s in softwares) + "]")
        return softwares

    def info_inspect(self, image):
        # "inspect_info":{
        #       "Id":"sha256:702ffd5274797d4cf4b47ac9f4d48cc470ebed1c668a0a2f7e7f1ef493210a65",
        #       "RepoTags":[ ],
        #       "RepoDigests":[ ],
        #       "Parent":"",
        #       "Comment":"",
        #       "Created":"2018-02-26T10:59:06.882767996Z",
        #       "Container":"aaaa08869053814f0a37c5829f74f64f0d5b781e3527572addddf861e5b20376",
        #       "ContainerConfig":{ },
        #       "DockerVersion":"17.06.1-ce",
        #       "Author":"",
        #       "Config":{ },
        #       "Architecture":"amd64",
        #       "Os":"linux",
        #       "Size":374134402,
        #       "VirtualSize":374134402,
        #       "GraphDriver":{ },
        #       "RootFS":{ }
        #    }
        self.logger.debug('[{}] $docker inspect <image>'.format(image.name))
        client = docker.APIClient(base_url='unix://var/run/docker.sock')
        json_inspect = client.inspect_image(
            image.name
        )  # Usign Low-level client docker because docker 2.0 has not "inspect_image" method
        wanted_keys = [
            'Id', 'RepoTags', 'RepoDigests', "Parent", "DockerVersion", "Size",
            "GraphDriver", "RootFS", "VirtualSize", "Architecture", "Os"
        ]  # The keys you want
        image.inspect_info = dict(
            (k, json_inspect[k]) for k in wanted_keys if k in json_inspect)
        # image.inspect_info = json_inspect

    def version_from_regex(self, container_id, command, regex):
        try:
            output = self.run_command(container_id, command)
            self.logger.info(regex)
            p = re.compile(regex)
            match = p.search(output)
            if match:
                version = match.group(0)
                if version != "." or version != ".go":
                    self.logger.debug("[{0}] found in {1}".format(
                        command, container_id))
                    return version
                else:
                    return None
            else:
                self.logger.debug("[{0}] NOT found in {1}".format(
                    command, container_id))
                return None
        except docker.errors.NotFound as e:
            self.logger.debug(command + " not found")

    def run_command(self, container_id, command):
        """Just like 'docker run CMD'.
            Return the output of the command.i
        """

        self.logger.debug("[{0}] running command {1}".format(
            container_id, command))

        created_exec = self.client_daemon.exec_create(container_id,
                                                      cmd=command)

        output = self.client_daemon.exec_start(created_exec['Id'])
        return output.decode()