Exemplo n.º 1
0
    def setUp(self):
        if os.path.exists(self.data_dir):
            shutil.rmtree(self.data_dir)

        mkdir_p(self.data_dir)
        self.storage = Storage(data_dir=self.data_dir)
        self.basic_setup = self._basic_setup()
        self.storage.create(*self.basic_setup)

        metric_name = self.basic_setup[0]
        self.path = self.storage.gen_path(self.data_dir, metric_name)
Exemplo n.º 2
0
    def setUp(self):
        if os.path.exists(self.data_dir):
            shutil.rmtree(self.data_dir)

        mkdir_p(self.data_dir)
        self.storage = Storage(data_dir=self.data_dir)
        self.basic_setup = self._basic_setup()
        self.storage.create(*self.basic_setup)

        metric_name = self.basic_setup[0]
        self.path = self.storage.gen_path(self.data_dir, metric_name)
        tag_list = self.basic_setup[1]
        self.null_point = (None,) * len(tag_list)
Exemplo n.º 3
0
def merge_metrics(meta, metric_paths, metric_names, output_file):
    ''' Merge metrics to a kenshin file.
    '''
    # Get content(data points grouped by archive) of each metric.
    metrics_archives_points = [
        get_metric_content(path, metric)
        for (path, metric) in zip(metric_paths, metric_names)
    ]

    # Merge metrics to a kenshin file
    with open(output_file, 'wb') as f:
        archives = meta['archive_list']
        archive_info = [(archive['sec_per_point'], archive['count'])
                        for archive in archives]
        inter_tag_list = metric_names + ['']  # for reserved space

        # header
        packed_kenshin_header = Storage.pack_header(
            inter_tag_list,
            archive_info,
            meta['x_files_factor'],
            Agg.get_agg_name(meta['agg_id']),
            )[0]
        f.write(packed_kenshin_header)

        for i, archive in enumerate(archives):
            archive_points = [x[i] for x in metrics_archives_points]
            merged_points = merge_points(archive_points)
            points = fill_gap(merged_points, archive, len(meta['tag_list']))
            packed_str = packed_kenshin_points(points)
            f.write(packed_str)
Exemplo n.º 4
0
def get_metric_content(metric_path, metric_name):
    ''' Return data points of each archive of the metric.
    '''
    conn = urllib.urlopen(metric_path)
    if conn.code == 200:
        content = conn.read()
    else:
        raise Exception('HTTP Error Code %s for %s' % (conn.code, metric_path))

    header = Storage.header(StringIO.StringIO(content))
    metric_list = header['tag_list']
    metric_cnt = len(metric_list)
    metric_idx = metric_list.index(metric_name)
    step = metric_cnt + 1
    point_format = header['point_format']
    byte_order, point_type = point_format[0], point_format[1:]
    metric_content = []
    now = int(time.time())

    for archive in header['archive_list']:
        ts_min = now - archive['retention']
        archive_points = []
        series_format = byte_order + (point_type * archive['count'])
        packed_str = content[archive['offset']: archive['offset'] + archive['size']]
        unpacked_series = struct.unpack(series_format, packed_str)
        for i in xrange(0, len(unpacked_series), step):
            ts = unpacked_series[i]
            if ts > ts_min:
                # (timestamp, value)
                datapoint = (ts, unpacked_series[i+1+metric_idx])
                archive_points.append(datapoint)
        metric_content.append(archive_points)

    return metric_content
Exemplo n.º 5
0
def merge_metrics(meta, metric_paths, metric_names, output_file):
    ''' Merge metrics to a kenshin file.
    '''
    # Get content(data points grouped by archive) of each metric.
    metrics_archives_points = [
        get_metric_content(path, metric)
        for (path, metric) in zip(metric_paths, metric_names)
    ]

    # Merge metrics to a kenshin file
    with open(output_file, 'wb') as f:
        archives = meta['archive_list']
        archive_info = [(archive['sec_per_point'], archive['count'])
                        for archive in archives]
        inter_tag_list = metric_names + ['']  # for reserved space

        # header
        packed_kenshin_header = Storage.pack_header(
            inter_tag_list,
            archive_info,
            meta['x_files_factor'],
            Agg.get_agg_name(meta['agg_id']),
        )[0]
        f.write(packed_kenshin_header)

        for i, archive in enumerate(archives):
            archive_points = [x[i] for x in metrics_archives_points]
            merged_points = merge_points(archive_points)
            points = fill_gap(merged_points, archive, len(meta['tag_list']))
            packed_str = packed_kenshin_points(points)
            f.write(packed_str)
Exemplo n.º 6
0
def get_metric_content(metric_path, metric_name):
    ''' Return data points of each archive of the metric.
    '''
    conn = urllib.urlopen(metric_path)
    if conn.code == 200:
        content = conn.read()
    else:
        raise Exception('HTTP Error Code %s for %s' % (conn.code, metric_path))

    header = Storage.header(StringIO.StringIO(content))
    metric_list = header['tag_list']
    metric_cnt = len(metric_list)
    metric_idx = metric_list.index(metric_name)
    step = metric_cnt + 1
    point_format = header['point_format']
    byte_order, point_type = point_format[0], point_format[1:]
    metric_content = []
    now = int(time.time())

    for archive in header['archive_list']:
        ts_min = now - archive['retention']
        archive_points = []
        series_format = byte_order + (point_type * archive['count'])
        packed_str = content[archive['offset']:archive['offset'] +
                             archive['size']]
        unpacked_series = struct.unpack(series_format, packed_str)
        for i in xrange(0, len(unpacked_series), step):
            ts = unpacked_series[i]
            if ts > ts_min:
                # (timestamp, value)
                datapoint = (ts, unpacked_series[i + 1 + metric_idx])
                archive_points.append(datapoint)
        metric_content.append(archive_points)

    return metric_content
Exemplo n.º 7
0
def merge_files(meta, metrics, data_dir, output_file):
    contents = [get_whisper_file_content(data_dir, m) for m in metrics]
    mkdir_p(os.path.dirname(output_file))
    needed_metrics = meta['metrics_max_num'] - len(metrics)
    now = int(time.time())

    with open(output_file, 'w') as f:
        archives = meta['archives']
        archive_info = [(archive['sec_per_point'], archive['count'])
                        for archive in archives]
        agg_name = get_agg_name(meta['agg_type'])
        inter_tag_list = metrics + [''] * (needed_metrics + 1)

        # header
        packed_kenshin_header = Storage.pack_header(inter_tag_list,
                                                    archive_info, meta['xff'],
                                                    agg_name)[0]
        f.write(packed_kenshin_header)

        # archives
        for archive in archives:
            whisper_points = [
                read_whisper_points(content, archive, now)
                for content in contents
            ]
            archive_points = merge_points(whisper_points, needed_metrics)
            archive_points = fill_gap(archive_points, archive,
                                      meta['metrics_max_num'])
            packed_str = packed_kenshin_points(archive_points)
            f.write(packed_str)
Exemplo n.º 8
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('-d',
                        '--src-data-dir',
                        required=True,
                        help="src data directory (local or http address).")
    parser.add_argument('-n',
                        '--src-instance-num',
                        required=True,
                        type=int,
                        help="src rurouni cache instance number.")
    parser.add_argument(
        '-m',
        '--kenshin-file',
        required=True,
        help='kenshin data files that we want to add the history.')
    parser.add_argument('-p',
                        '--processes',
                        default=10,
                        type=int,
                        help="number of processes.")
    args = parser.parse_args()

    # start processes
    processes = []
    queue = Queue()
    for w in xrange(args.processes):
        p = Process(target=worker, args=(queue, ))
        p.start()
        processes.append(p)

    # generate data
    with open(args.kenshin_file) as f:
        for line in f:
            kenshin_filepath = line.strip()
            if not kenshin_filepath:
                continue
            with open(kenshin_filepath) as f:
                header = Storage.header(f)
            metrics = header['tag_list']
            metric_paths = [
                metric_to_filepath(args.src_data_dir, m, args.src_instance_num)
                for m in metrics
            ]
            item = (header, metric_paths, metrics, kenshin_filepath)
            queue.put(item)

    # stop processes
    for _ in xrange(args.processes):
        queue.put("STOP")
    for p in processes:
        p.join()
Exemplo n.º 9
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-d', '--src-data-dir', required=True,
        help="src data directory (local or http address)."
        )
    parser.add_argument(
        '-n', '--src-instance-num', required=True, type=int,
        help="src rurouni cache instance number."
        )
    parser.add_argument(
        '-m', '--kenshin-file', required=True,
        help='kenshin data files that we want to add the history.'
        )
    parser.add_argument(
        '-p', '--processes', default=10, type=int,
        help="number of processes."
        )
    args = parser.parse_args()

    # start processes
    processes = []
    queue = Queue()
    for w in xrange(args.processes):
        p = Process(target=worker, args=(queue,))
        p.start()
        processes.append(p)

    # generate data
    with open(args.kenshin_file) as f:
        for line in f:
            kenshin_filepath = line.strip()
            if not kenshin_filepath:
                continue
            with open(kenshin_filepath) as f:
                header = Storage.header(f)
            metrics = header['tag_list']
            metric_paths = [
                metric_to_filepath(args.src_data_dir, m, args.src_instance_num)
                for m in metrics
            ]
            item = (header, metric_paths, metrics, kenshin_filepath)
            queue.put(item)

    # stop processes
    for _ in xrange(args.processes):
        queue.put("STOP")
    for p in processes:
        p.join()
Exemplo n.º 10
0
class TestStorageBase(unittest.TestCase):
    data_dir = '/tmp/kenshin'

    def setUp(self):
        if os.path.exists(self.data_dir):
            shutil.rmtree(self.data_dir)

        mkdir_p(self.data_dir)
        self.storage = Storage(data_dir=self.data_dir)
        self.basic_setup = self._basic_setup()
        self.storage.create(*self.basic_setup)

        metric_name = self.basic_setup[0]
        self.path = self.storage.gen_path(self.data_dir, metric_name)
        tag_list = self.basic_setup[1]
        self.null_point = (None,) * len(tag_list)

    def tearDown(self):
        shutil.rmtree(self.data_dir)

    @staticmethod
    def _gen_val(i, num=2):
        return [10 * j + i for j in range(num)]
Exemplo n.º 11
0
# coding: utf-8

from kenshin.storage import (Storage, KenshinException, InvalidConfig,
                             InvalidTime, RetentionParser)

__version__ = "0.3.1"
__commit__ = "9b67db3"
__author__ = "zzl0"
__email__ = "*****@*****.**"
__date__ = "Sun Dec 18 16:09:53 2016 +0800"

_storage = Storage()
validate_archive_list = _storage.validate_archive_list
create = _storage.create
update = _storage.update
fetch = _storage.fetch
header = _storage.header
pack_header = _storage.pack_header
add_tag = _storage.add_tag

parse_retention_def = RetentionParser.parse_retention_def
Exemplo n.º 12
0
class TestStorageIO(unittest.TestCase):
    data_dir = '/tmp/kenshin'

    def setUp(self):
        if os.path.exists(self.data_dir):
            shutil.rmtree(self.data_dir)

        mkdir_p(self.data_dir)
        self.storage = Storage(data_dir=self.data_dir)
        self.basic_setup = self._basic_setup()
        self.storage.create(*self.basic_setup)

        metric_name = self.basic_setup[0]
        self.path = self.storage.gen_path(self.data_dir, metric_name)

    def tearDown(self):
        shutil.rmtree(self.data_dir)

    def _basic_setup(self):
        metric_name = 'sys.cpu.user'
        self.file_cnt = 40

        tag_list = ['host=webserver%s,cpu=%s' % (i, i)
                    for i in range(self.file_cnt)]
        archive_list = "1s:1h,60s:2d,300s:7d,15m:25w,12h:5y".split(',')
        archive_list = [RetentionParser.parse_retention_def(x)
                        for x in archive_list]

        x_files_factor = 20
        agg_name = 'min'
        return [metric_name, tag_list, archive_list, x_files_factor, agg_name]

    def _gen_val(self, i):
        res = []
        for j in range(self.file_cnt):
            res.append(i + 10*j)
        return tuple(res)

    def test_io(self):
        """
        test io perfermance.

        (1000 io/s * 3600 s * 24) / (3*10**6 metric) / (40 metric/file) = 1152 io/file
        由于 header 函数在一次写入中被调用了多次,而 header 数据较小,完全可以读取缓存数据,
        因此 enable_debug 中忽略了 header 的读操作。
        """
        enable_debug(ignore_header=True)

        now_ts = 1411628779
        ten_min = 10 * RetentionParser.TIME_UNIT['minutes']
        one_day = RetentionParser.TIME_UNIT['days']
        from_ts = now_ts - one_day

        for i in range(one_day / ten_min):
            points = [(from_ts + i * ten_min + j, self._gen_val(i * ten_min + j))
                      for j in range(ten_min)]
            self.storage.update(self.path, points, from_ts + (i+1) * ten_min)

        open_ = kenshin.storage.open
        io = open_.read_cnt + open_.write_cnt
        io_limit = 1152
        self.assertLessEqual(io, io_limit)
Exemplo n.º 13
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-t', '--src-type', required=True,
        choices=['whisper', 'kenshin'],
        help="src storage type"
        )
    parser.add_argument(
        '-d', '--src-data-dir', required=True,
        help="src data directory (http address)."
        )
    parser.add_argument(
        '-n', '--src-instance-num', type=int,
        help=('src rurouni cache instance number (required when src_type '
              'is kenshin)')
        )
    parser.add_argument(
        '-m', '--kenshin-file', required=True,
        help=('kenshin data files that we want to add the history, '
              'use kenshin-find-file.py to generate this file.')
        )
    parser.add_argument(
        '-p', '--processes', default=10, type=int,
        help="number of processes."
        )
    args = parser.parse_args()

    if args.src_type == 'kenshin' and args.src_instance_num is None:
        parser.error('src-instance-num is required')

    # start processes
    processes = []
    queue = Queue()
    for w in xrange(args.processes):
        p = Process(target=worker, args=(queue,))
        p.start()
        processes.append(p)

    # generate data
    with open(args.kenshin_file) as f:
        for line in f:
            kenshin_filepath = line.strip()
            if not kenshin_filepath:
                continue
            with open(kenshin_filepath) as f:
                header = Storage.header(f)
            metrics = header['tag_list']
            if args.src_type == 'kenshin':
                metric_paths = [
                    metric_to_filepath(args.src_data_dir, m, args.src_instance_num)
                    for m in metrics
                ]
            else:  # whisper
                metric_paths = [
                    metric_to_whisper_filepath(args.src_data_dir, m)
                    for m in metrics
                ]
            item = (args.src_type, header, metric_paths, metrics, kenshin_filepath)
            queue.put(item)

    # stop processes
    for _ in xrange(args.processes):
        queue.put("STOP")
    for p in processes:
        p.join()