Пример #1
0
def s2e_concolic_testcase_watcher(seedbox, testcases):
    setproctitle.setproctitle('S2E test case watcher')

    processed = set()
    count = 0

    i = adapters.InotifyTree(testcases, mask=IN_CLOSE_WRITE)
    for event in i.event_gen():
        # auto join child process
        if event is None:
            continue

        (_, _, path, filename) = event
        full_name = os.path.join(path,filename)

        md5 = md5sum(full_name)
        if md5 in processed:
            continue
        processed.add(md5)
        count += 1

        dst_file= 'id:{:06d},{}'.format(count, filename)
        dst = '{}/{}'.format(seedbox, dst_file)
        shutil.copyfile(full_name, dst)
        print '[{}] -> [{}]'.format(filename, dst_file)
Пример #2
0
def wait_for_dir(watched_path):
    watched_path = six.binary_type(watched_path.encode('utf-8'))
    i = adapters.InotifyTree(watched_path, mask=constants.IN_CLOSE_WRITE)
    for event in i.event_gen():
        if event is not None:
            filename = path.basename(event[3])
            dir_name = event[2]
            if filename.startswith(b'.#') or filename.startswith(
                    b'flycheck') or filename.endswith(b"~"):
                continue
            elif dir_name.endswith(b'.cache/v/cache'):
                continue
            else:
                print(event)
                sys.exit(0)
Пример #3
0
def s2e_expdata_watcher(argv):
    ''' replace S2E expdata files with /dev/null symlink '''
    setproctitle('S2E expdata handler')

    watch_dir = '{}/expdata'.format(argv['out_s2e'])
    tmp_dir = argv['out_s2e']

    i = adapters.InotifyTree(watch_dir, mask=IN_CREATE)
    for event in i.event_gen():
        if event is None:
            continue

        (header, _, path, filename) = event

        if header.mask & IN_ISDIR:
            continue

        tmplink = '{}/{}_tmp'.format(tmp_dir, filename)
        os.symlink(devnull, tmplink)
        os.rename(tmplink, os.path.join(path, filename))
Пример #4
0
def s2e_concolic_testcase_watcher(argv):
    setproctitle('S2E test case watcher')

    watch_dir = '{}/testcases'.format(argv['out_s2e'])
    db_config = argv['db_config']
    seedbox = argv['seedbox']
    processed = set()
    count = 0

    def process_input(path, filename, count):
        setproctitle('S2E test case watcher: processing [{}]'.format(filename))
        # 1. update interested table with testcase info
        src = os.path.join(path, filename)
        try:
            with open(src, 'rb') as f_src:
                src_content = f_src.read()
            md5 = md5sum(src)
        except Exception as excp:
            print getattr(excp, 'message', repr(excp))
            return

        conn = psycopg2.connect(host=db_config['host'],
                                port=db_config['port'],
                                database=db_config['database'],
                                user=db_config['user'],
                                password=db_config['password'])
        cur = conn.cursor()

        id_file = 0
        try:
            query = ('insert into file (hash, file) '
                     'values (%s, %s) on conflict do nothing returning id;')
            cur.execute(query, (md5, psycopg2.Binary(src_content)))
            conn.commit()
            id_file = cur.fetchone()
        except KeyboardInterrupt:
            raise
        except psycopg2.IntegrityError as excp:
            print getattr(excp, 'message', repr(excp))
            conn.rollback()
        except Exception as excp:
            print getattr(excp, 'message', repr(excp))
            conn.rollback()

        # only update interested table if the testcase is generated by concolic mode
        if not filename.startswith('testcase'):
            # l_fn = ['s2e', 'input', idx_input, idx_interested, idx_basic_block, md5_short]
            l_fn = filename.split('_')
            if cur.description is None or id_file is None:
                query = 'select id from file where hash = %s;'
                cur.execute(query, (md5, ))
                id_file = cur.fetchone()

            if id_file is not None:
                query = ('update interested set '
                         'idfile = %s, '
                         'uri = %s, '
                         'update_time=now(), '
                         'status = 3 '
                         'where id = %s returning id;')
                cur.execute(query, (id_file[0], src, l_fn[3]))
                rowcount = cur.rowcount

                if rowcount != 1:
                    conn.rollback()
                else:
                    conn.commit()

        conn.close()

        # copy file from s2e test case output directory to AFL seedbox
        if not filename.startswith('testcase'):
            dst_file = 'id:{:06d},{},{},{},{}'.format(count, l_fn[2], l_fn[3],
                                                      l_fn[4], l_fn[5])
            dst = '{}/{}'.format(seedbox, dst_file)
        else:
            dst_file = 'id:{:06d},{}'.format(count, filename)
            dst = '{}/{}'.format(seedbox, dst_file)

        # lastly copy file to seedbox
        print '[W][S2E]: [{}] -> [{}]'.format(basename(src), basename(dst))
        shutil.copyfile(src, dst)

    i = adapters.InotifyTree(watch_dir, mask=IN_CLOSE_WRITE)
    for event in i.event_gen():
        # auto join child process
        active_children()

        if event is None:
            continue

        (_, _, path, filename) = event

        md5 = md5sum(os.path.join(path, filename))
        if md5 in processed:
            continue
        processed.add(md5)

        count += 1
        p_input = Process(target=process_input, args=[path, filename, count])
        p_input.start()
Пример #5
0
def afl_watcher(argv):
    setproctitle('AFL test case watcher')

    def process_input(path, filename):
        setproctitle('analyzing [{}:{}]'.format(basename(dirname(path)),
                                                filename))

        db_config = argv['db_config']
        project_id = argv['project_id']
        qemu = argv['qemu']
        basedir = argv['basedir']

        analyzer = DynamicAnalyzer(db_config=db_config,
                                   qemu=qemu,
                                   basedir=basedir,
                                   project_id=project_id)
        analyzer.analyze_dynamic(os.path.join(path, filename))

    def process_stats(path, filename):
        setproctitle('Processing fuzzer_stats for node {}'.format(
            basename(path)))

        db_config = argv['db_config']
        project_id = argv['project_id']

        conn = psycopg2.connect(host=db_config['host'],
                                port=db_config['port'],
                                database=db_config['database'],
                                user=db_config['user'],
                                password=db_config['password'])
        cur = conn.cursor()

        with open(os.path.join(path, filename)) as f_stats:
            content = f_stats.readlines()

        stats_dict = dict(
            map(str.strip, line.split(':', 1)) for line in content)

        stats_dict['idproject'] = project_id

        try:
            columns = stats_dict.keys()
            values = [stats_dict[column] for column in columns]
            exclude_columns = ['EXCLUDED.' + column for column in columns]

            query = ('insert into afl_stats '
                     '(%s) values %s '
                     'on conflict (idproject, afl_banner) DO UPDATE SET '
                     '(%s) = (%s) '
                     'returning id;')

            cur.execute(
                query,
                (AsIs(', '.join(columns)), tuple(values),
                 AsIs(', '.join(columns)), AsIs(', '.join(exclude_columns))))

            conn.commit()
            id_file = cur.fetchone()
        except psycopg2.IntegrityError as excp:
            print getattr(excp, 'message', repr(excp))
            conn.rollback()
        except Exception as excp:
            print getattr(excp, 'message', repr(excp))
            conn.rollback()

        conn.close()

    def worker(queue):
        setproctitle('AFL dynamic analyze dispatcher')
        processes = []
        MAX_PROCESSES = 20

        while True:
            try:
                # max number of child processes
                while active_children():
                    processes[:] = [p for p in processes if p.is_alive()]
                    if len(processes) < MAX_PROCESSES:
                        break
                    time.sleep(0.1)

                path, filename = queue.get()

                p_input = Process(target=process_input, args=[path, filename])
                p_input.start()
                processes.append(p_input)
            except KeyboardInterrupt:
                raise
            except Exception as excp:
                print getattr(excp, 'message', repr(excp))
                continue

    watch_dir = argv['out_afl']
    max_testcase_size = argv.get('max_testcase_size', 1024 * 1024 * 50)
    queue = Queue()
    processed = set()
    tracker = set()

    worker = Process(target=worker, args=[queue])
    worker.start()

    i = adapters.InotifyTree(watch_dir, mask=IN_CLOSE_WRITE)
    for event in i.event_gen():
        if event is None:
            continue

        (_, _, path, filename) = event

        # filter #1, most often: the evenet is not inside queue directory
        dir_base = basename(path)
        if not dir_base == 'queue':
            # possibly its the fuzzer statistics
            if filename == 'fuzzer_stats':
                p_stats = Process(target=process_stats, args=[path, filename])
                p_stats.start()
            continue

        # filter #2, there is a subdirectory inside queue
        if not filename.startswith('id:'):
            continue

        # filter #3, do not analyze seedbox
        node = basename(dirname(path))
        if node == 'seedbox':
            continue

        # filter #4, for some reason, *MOST* of the test case AFL created,
        # IN_CLOSE_WRITE event will fire twice, workaround by only handling
        # the event every second time.
        if not (path, filename) in tracker:
            tracker.add((path, filename))
            continue
        tracker.remove((path, filename))

        current = []
        # XXX since the tracker set keeps growing, clear the set when reaches
        # 100 records by try to put them into queue for processing
        if len(tracker) > 100:
            while tracker:
                current.append(tracker.pop())

        # always put current event file if reach here
        current.append((path, filename))

        for c_path, c_filename in current:
            # filter #5, different nodes can generate test case with same hash
            md5 = md5sum(os.path.join(c_path, c_filename))
            if md5 in processed:
                continue
            processed.add(md5)

            f_size = os.stat(os.path.join(c_path, c_filename)).st_size
            if f_size > max_testcase_size:
                print 'TEST CASE FILE SIZE TOO LARGE FOR FILE: '
                print '{}:{} ({}) NOT SYNC INTO DATABASE'.format(
                    node, c_filename, f_size)
                continue

            queue.put((c_path, c_filename))

            print '[W][AFL][{}][{: >8}]: [{}] {}'.format(
                len(processed), basename(dirname(c_path)), c_filename, md5)
        active_children()
    def run(self, data, store, signal, context, **kwargs):
        """ The main run method of the NotifyTriggerTask task.

        Args:
            data (MultiTaskData): The data object that has been passed from the
                                  predecessor task.
            store (DataStoreDocument): The persistent data store object that allows the
                                       task to store data for access across the current
                                       workflow run.
            signal (TaskSignal): The signal object for tasks. It wraps the construction
                                 and sending of signals into easy to use methods.
            context (TaskContext): The context in which the tasks runs.

        Raises:
            LightflowFilesystemPathError: If the specified path is not absolute.
        """
        params = self.params.eval(data, store)

        # build notification mask
        on_file_create = constants.IN_CREATE if params.on_file_create else 0x00000000
        on_file_close = constants.IN_CLOSE_WRITE if params.on_file_close else 0x00000000
        on_file_delete = constants.IN_DELETE if params.on_file_delete else 0x00000000
        on_file_move = constants.IN_MOVE if params.on_file_move else 0x00000000
        mask = (on_file_create | on_file_close | on_file_delete | on_file_move)

        if not os.path.isabs(params.path):
            raise LightflowFilesystemPathError(
                'The specified path is not an absolute path')

        if params.recursive:
            notify = adapters.InotifyTree(params.path.encode('utf-8'))
        else:
            notify = adapters.Inotify()
            notify.add_watch(params.path.encode('utf-8'))

        # setup regex
        if isinstance(params.exclude_mask, str):
            regex = re.compile(params.exclude_mask)
        else:
            regex = None

        # if requested, pre-fill the file list with existing files
        files = []
        if params.use_existing:
            for (dir_path, dir_names, filenames) in os.walk(params.path):
                files.extend([os.path.join(dir_path, filename) for filename in filenames])
                if not params.recursive:
                    break

            if regex is not None:
                files = [file for file in files if regex.search(file) is None]

            if params.flush_existing and len(files) > 0:
                if self._callback is not None:
                    self._callback(files, data, store, signal, context)
                del files[:]

        polling_event_number = 0
        try:
            for event in notify.event_gen():
                if params.event_trigger_time is not None:
                    time.sleep(params.event_trigger_time)

                # check every stop_polling_rate events the stop signal
                polling_event_number += 1
                if polling_event_number > params.stop_polling_rate:
                    polling_event_number = 0
                    if signal.is_stopped:
                        break

                # in case of an event check whether it matches the mask and call a dag
                if event is not None:
                    (header, type_names, watch_path, filename) = event

                    if (not header.mask & constants.IN_ISDIR) and\
                            (header.mask & mask):
                        new_file = os.path.join(watch_path.decode('utf-8'),
                                                filename.decode('utf-8'))

                        add_file = not params.skip_duplicate or \
                            (params.skip_duplicate and new_file not in files)

                        if add_file and regex is not None:
                            add_file = regex.search(new_file) is None

                        if add_file:
                            files.append(new_file)

                # as soon as enough files have been aggregated call the sub dag
                if len(files) >= params.aggregate:
                    chunks = len(files) // params.aggregate
                    for i in range(0, chunks):
                        if self._callback is not None:
                            self._callback(files[0:params.aggregate], data,
                                           store, signal, context)
                        del files[0:params.aggregate]

        finally:
            if not params.recursive:
                notify.remove_watch(params.path.encode('utf-8'))

        return Action(data)
Пример #7
0
    def bblog_watcher(self, bb_exec_q):
        ''' watcher for log files that contains the covered basic blocks of each S2E execution '''
        setproctitle('S2E basic block coverage log handler')
        paths_bblog    = '{}/output_s2e/BBLog'.format(self._basedir)
        check_dir(paths_bblog)
        processed = set()

        def tail_with_pid(full_path, pid):
            ''' simple tail implementation '''
            interval = 1.0
            f_log = open(full_path)
            while True:
                try:
                    where = f_log.tell()
                    lines = f_log.readlines()
                    if not lines:
                        time.sleep(interval)
                        f_log.seek(where)
                    else:
                        yield lines

                    # break the loop if process no longer exists
                    if not psutil.pid_exists(pid):
                        break
                except IOError:
                    yield ''
                except KeyboardInterrupt:
                    raise

        def process_log(path, filename):
            ''' process the basic block coverage log from S2E execution '''
            full_path = os.path.join(path, filename)

            try:
                pid = int(filename.split('_')[0])
            except ValueError:
                print 'extract pid from ' + filename + ' failed.'
                return

            for lines in tail_with_pid(full_path, pid):
                for line in lines:
                    try:
                        bb_exec_q.put(int(line.rstrip()))
                    except ValueError:
                        print 'convert basic block id failed. {}'.format(line.rstrip())
                    except Full:
                        print 'put value into queue failed, Queue if full'
                    except Exception:
                        raise

        i = adapters.InotifyTree(paths_bblog, mask=IN_CREATE)
        for event in i.event_gen():
            active_children()
            if event is None:
                continue

            (_, _, path, filename) = event
            if filename in processed:
                continue
            processed.add(filename)

            p_event = Process(target=process_log, args=[path, filename])
            p_event.start()