Exemplo n.º 1
0
    def __init__(self, args):
        self.args = args
        self.ac = ApplicationConfiguration(parserargs=args)
        self.procs = self.set_procs(args.number)
        if not os.path.exists(self.ac.workdir):
            os.makedirs(self.ac.workdir)
        self.cs = ContainerSearch()
        self.output = Reporter()
        self.cve_file = os.path.join(self.ac.workdir,
                                     "com.redhat.rhsa-all.xml")
        self.cve_file_bz = os.path.join(self.ac.workdir,
                                        "com.redhat.rhsa-all.xml.bz2")

        self.scan_list = None
        self.rpms = {}
Exemplo n.º 2
0
class Worker(object):

    min_procs = 2
    max_procs = 4

    def __init__(self, args):
        self.args = args
        self.ac = ApplicationConfiguration(parserargs=args)
        self.procs = self.set_procs(args.number)
        if not os.path.exists(self.ac.workdir):
            os.makedirs(self.ac.workdir)
        self.cs = ContainerSearch()
        self.output = Reporter()
        self.cve_file = os.path.join(self.ac.workdir,
                                     "com.redhat.rhsa-all.xml")
        self.cve_file_bz = os.path.join(self.ac.workdir,
                                        "com.redhat.rhsa-all.xml.bz2")

        self.scan_list = None
        self.rpms = {}

    def set_procs(self, number):

        numThreads = psutil.NUM_CPUS if number is None else number

        if numThreads < self.min_procs:
            if self.ac.number is not None:
                print "The image-scanner requires --number to be a minimum " \
                      "of {0}. Setting --number to {1}".format(self.min_procs,
                                                               self.min_procs)
            return self.min_procs
        elif numThreads <= self.max_procs:
            return numThreads
        else:
            if self.ac.number is not None:
                print "Due to docker issues, we limit the max number "\
                      "of threads to {0}. Setting --number to "\
                      "{1}".format(self.max_procs, self.max_procs)
            return self.max_procs

    def _get_cids_for_image(self, cs, image):
        cids = []

        if image in cs.fcons:
            for container in cs.fcons[image]:
                cids.append(container['uuid'])
        else:
            for iid in cs.fcons:
                cids = [con['uuid'] for con in cs.fcons[iid]]
                if image in cids:
                    return cids

        return cids

    def get_cve_data(self):

        # FIXME
        # Wrap this in an exception

        hdr = {'User-agent': 'Mozilla/5.0'}
        url = ("http://www.redhat.com/security/data/oval/"
               "com.redhat.rhsa-all.xml.bz2")

        self.ac._print("Obtaining CVE file data from {0}".format(url))

        bar = urllib2.Request(url, "", hdr)
        try:
            resp = urllib2.urlopen(bar)
        except:
            raise ImageScannerClientError("Unable to fetch CVE date from {0}"
                                          .format(url))
        fh = open(self.cve_file_bz, "w")
        fh.write(resp.read())
        fh.close()

    def extract_cve_data(self):
        # Extract the XML bz
        bzfile = bz2.BZ2File(self.cve_file_bz)
        cve_data = bzfile.read()
        open(self.cve_file, 'wb').write(cve_data)

    def return_active_threadnames(self, threads):
        thread_names = []
        for thread in threads:
            thread_name = thread._Thread__name
            if thread_name is not "MainThread":
                thread_names.append(thread_name)

        return thread_names

    def onlyactive(self):
        ''' This function sorts of out only the active containers'''
        con_list = []
        # Rid ourselves of 0 size containers
        for container in self.cs.active_containers:
            con_list.append(container['Id'])
        if len(con_list) == 0:
            error = "There are no active containers on this system"
            if not self.ac.api:
                print error
                sys.exit(1)
            else:
                raise ImageScannerClientError(error)
        else:
            self._do_work(con_list)

    def allimages(self):
        if len(self.cs.imagelist) == 0:
            error = "There are no images on this system"
            if not self.ac.api:
                print error
                sys.exit(1)
            else:
                raise ImageScannerClientError(error)
        if self.args.allimages:
            self._do_work(self.cs.allimagelist)
        else:
            self._do_work(self.cs.imagelist)

    def list_of_images(self, image_list):
        self._do_work(image_list)

    def allcontainers(self):
        if len(self.cs.cons) == 0:
            error = "There are no containers on this system"
            if not self.ac.api:
                print error
                sys.exit(1)
            else:
                raise ImageScannerClientError(error)
        else:
            con_list = []
            for con in self.cs.cons:
                con_list.append(con['Id'])
            self._do_work(con_list)

    def _do_work(self, image_list):
        self.scan_list = image_list
        cp = CVEParse(self.ac.workdir)
        if (not os.path.exists(cp.xmlf)) or \
                (self.ac.nocache) or \
                ((time.time() - os.path.getmtime(cp.xmlf)) / (60 ** 2) > 12):
            # If we find a tarball of the dist break outs and
            # it is less than 12 hours old, use it to speed things
            # up

            self.get_cve_data()
            self.extract_cve_data()

            self.ac._print("Splitting master XML file into distribution "
                           "specific XML files")

            # Run dist breakout to make dist specific XML
            # files
            t = timeit.Timer(cp.parse_for_platform).timeit(number=1)
            logging.debug("Parsed distribution breakup in "
                          "{0} seconds".format(t))
        self.ac._print("\nBegin processing\n")
        threads = []

        for image in image_list:
            if image in self.cs.dead_cids:
                raise ImageScannerClientError("Scan not completed. Cannot "
                                              "scan the dead "
                                              "container {0}".format(image))
            cids = self._get_cids_for_image(self.cs, image)
            t = threading.Thread(target=self.search_containers, name=image,
                                 args=(image, cids, self.output,))
            threads.append(t)

        logging.info("Number of containers to scan: {0}".format(len(threads)))
        total_images = len(threads)
        if isinstance(threading.current_thread(), threading._MainThread):
            signal.signal(signal.SIGINT, self.signal_handler)
        self.threads_complete = 0
        self.ac._print("")
        while len(threads) > 0:
            if len(threading.enumerate()) < self.procs:
                new_thread = threads.pop()
                new_thread.start()
                self._progress(float(self.threads_complete),
                               float(total_images))
        # Seeing some weirdness with the exit thread count
        # when using the API, depends on how it is called

        if self.ac.api:
            exit_thread_count = 1
        else:
            exit_thread_count = 1

        while len(threading.enumerate()) > exit_thread_count:
            self._progress(float(self.threads_complete), float(total_images))
            time.sleep(1)
            pass

        self._progress(float(self.threads_complete), float(total_images))
        self.ac._print("\n" * 2)
        self.output.report_summary()

    def _progress(self, complete, total):
        if not self.ac.api:
            sys.stdout.write("\r[{0:20s}] {1}%    {2}/{3}"
                             .format('#' * int(complete / total * 20),
                                     int(complete / total * 100),
                                     int(complete), int(total)))
            sys.stdout.flush()

    def signal_handler(self, signal, frame):
        print "\n\nExiting..."
        sys.exit(0)

    def search_containers(self, image, cids, output):
        f = Scan(image, cids, output)
        try:
            if f.get_release():

                t = timeit.Timer(f.scan).timeit(number=1)
                logging.debug("Scanned chroot for image {0}"
                              " completed in {1} seconds"
                              .format(image, t))
                timeit.Timer(f.report_results).timeit(number=1)
                image_rpms = f._get_rpms()
                self.rpms[image] = image_rpms
            else:
                # This is not a RHEL image or container
                f._report_not_rhel(image)
        except subprocess.CalledProcessError:
            pass

        start = time.time()
        f.DM.cleanup(f.dm_results)
        logging.debug("Removing temporary chroot for image {0} completed in"
                      " {1} seconds".format(image, time.time() - start))
        self.threads_complete += 1

    def _check_input(self, image_list):
        '''
        Takes a list of image ids, image-names, container ids, or
        container-names and returns a list of images ids and
        container ids
        '''
        dm = DockerMount(dockerclient=self.ac.conn)
        work_list = []
        # verify
        try:
            for image in image_list:
                iid, dtype = dm.get_iid(image)
                work_list.append(iid)
        except DockerMountError:
            error = "Unable to associate {0} with any image " \
                    "or container".format(image)
            if not self.ac.api:
                print error
                sys.exit(1)
            else:
                raise ImageScannerClientError(error)
        return work_list

    def start_application(self):
        start_time = time.time()
        logging.basicConfig(filename=self.ac.logfile,
                            format='%(asctime)s %(levelname)-8s %(message)s',
                            datefmt='%m-%d %H:%M', level=logging.DEBUG)
        try:
            if self.args.onlyactive:
                self.onlyactive()
            if self.args.allcontainers:
                self.allcontainers()
            if self.args.allimages or self.args.images:
                self.allimages()
            if self.args.scan:
                # Check to make sure we have valid input
                image_list = self._check_input(self.args.scan)
                self.list_of_images(image_list)

        except ImageScannerClientError as scan_error:
            if not self.ac.api:
                print scan_error
                sys.exit(1)
            else:
                return {'Error': str(scan_error)}, None
        end_time = time.time()
        duration = (end_time - start_time)
        if duration < 60:
            unit = "seconds"
        else:
            unit = "minutes"
            duration = duration / 60
        logging.info("Completed entire scan in {0} {1}".format(duration, unit))
        if self.ac.api:
            self.dump_json_log()
            return self.ac.return_json, self.ac.json_url

    def _get_rpms_by_obj(self, docker_obj):
        return self.rpms[docker_obj]

    def dump_json_log(self):
        '''
        Creates a log of information about the scan and what was
        scanned for post-scan analysis
        '''

        xmlp = Create_Summary()

        # Common Information
        json_log = {}
        json_log['hostname'] = platform.node()
        json_log['scan_time'] = datetime.today().isoformat(' ')
        json_log['scanned_content'] = self.scan_list
        json_log['host_results'] = {}
        json_log['docker_state'] = self.ac.fcons
        json_log['host_images'] = [image['Id'] for image in self.ac.allimages]
        json_log['host_containers'] = [con['Id'] for con in self.ac.cons]
        json_log['docker_state_url'] = self.ac.json_url

        tuple_keys = ['rest_host', 'rest_port', 'allcontainers',
                      'allimages', 'images', 'logfile', 'number',
                      'reportdir', 'workdir', 'api', 'url_root',
                      'host']
        for tuple_key in tuple_keys:
            tuple_val = None if not hasattr(self.ac.parserargs, tuple_key) \
                else getattr(self.ac.parserargs, tuple_key)
            json_log[tuple_key] = tuple_val

        # Per scanned obj information

        for docker_obj in self.scan_list:
            json_log['host_results'][docker_obj] = {}
            tmp_obj = json_log['host_results'][docker_obj]
            if 'msg' in self.ac.return_json[docker_obj].keys():
                tmp_obj['isRHEL'] = False
            else:
                tmp_obj['rpms'] = self._get_rpms_by_obj(docker_obj)
                tmp_obj['isRHEL'] = True
                xml_path = self.ac.return_json[docker_obj]['xml_path']
                tmp_obj['cve_summary'] = \
                    xmlp._summarize_docker_object(xml_path,
                                                  json_log, docker_obj)

        # Pulling out good stuff from summary by docker object
        for docker_obj in self.ac.return_json.keys():
            if 'msg' not in self.ac.return_json[docker_obj].keys():
                for key, value in self.ac.return_json[docker_obj].iteritems():
                    json_log['host_results'][docker_obj][key] = value

        # FIXME
        # Decided to remove this from the JSON as it is considered to be
        # duplicate.  Feel free to remove after some time.

        # json_log['results_summary'] = self.ac.return_json

        # DEBUG
        # print  json.dumps(json_log, indent=4, separators=(',', ': '))
        with open(self.ac.docker_state, 'w') as state_file:
            json.dump(json_log, state_file)