示例#1
0
def delete_file(storage_dir, index, pos_metrics):
    """
    Note: We do not delete the data file, just delete the tags in data file,
    so the space can reused by new metric.
    """
    bucket, schema_name, fid = index
    bucket_data_dir = os.path.join(storage_dir, 'data', bucket)
    filepath = getFilePathByInstanceDir(bucket_data_dir, schema_name, fid)

    with open(filepath, "r+b") as fh:
        header_info = header(fh)
        tag_list = header_info["tag_list"]
        reserved_size = header_info["reserved_size"]
        archive_list = [(a["sec_per_point"], a["count"])
                        for a in header_info["archive_list"]]
        agg_name = Agg.get_agg_name(header_info["agg_id"])

        released_size = 0
        for pos_idx, tag in pos_metrics:
            if tag == tag_list[pos_idx]:
                tag_list[pos_idx] = ""
                released_size += len(tag)
            elif tag_list[pos_idx] != "":
                print >> sys.stderr, "tag not match: (%s, %d)" % (tag, pos_idx)

        if released_size != 0:
            inter_tag_list = tag_list + ["N" * (reserved_size + released_size)]
            packed_header, _ = pack_header(inter_tag_list, archive_list,
                                           header_info["x_files_factor"],
                                           agg_name)
            fh.write(packed_header)
示例#2
0
def delete_file(storage_dir, index, pos_metrics):
    """
    Note: We do not delete the data file, just delete the tags in data file,
    so the space can reused by new metric.
    """
    bucket, schema_name, fid = index
    bucket_data_dir = os.path.join(storage_dir, 'data', bucket)
    filepath = getFilePathByInstanceDir(bucket_data_dir, schema_name, fid)

    with open(filepath, "r+b") as fh:
        header_info = header(fh)
        tag_list = header_info["tag_list"]
        reserved_size = header_info["reserved_size"]
        archive_list = [(a["sec_per_point"], a["count"])
                        for a in header_info["archive_list"]]
        agg_name = Agg.get_agg_name(header_info["agg_id"])

        released_size = 0
        for pos_idx, tag in pos_metrics:
            if tag == tag_list[pos_idx]:
                tag_list[pos_idx] = ""
                released_size += len(tag)
            elif tag_list[pos_idx] != "":
                print >>sys.stderr, "tag not match: (%s, %d)" % (tag, pos_idx)

        if released_size != 0:
            inter_tag_list = tag_list + ["N" * (reserved_size + released_size)]
            packed_header, _ = pack_header(inter_tag_list,
                                           archive_list,
                                           header_info["x_files_factor"],
                                           agg_name)
            fh.write(packed_header)
示例#3
0
def main():
    if len(sys.argv) < 3:
        print('Need data_dir and link_dir.\n'
              'e.g.: kenshin-rebuild-link.py /kenshin/data/a /kenshin/link/a')
        sys.exit(1)

    data_dir, link_dir = sys.argv[1:]
    data_dir = os.path.abspath(data_dir)
    link_dir = os.path.abspath(link_dir)

    for schema_name in os.listdir(data_dir):
        hs_file_pat = os.path.join(data_dir, schema_name, '*.hs')
        for fp in glob.glob(hs_file_pat):
            with open(fp) as f:
                header = kenshin.header(f)
                metric_list = header['tag_list']
                for metric in metric_list:
                    if metric != '':
                        try:
                            create_link(metric, link_dir, fp)
                        except OSError as exc:
                            if exc.errno == errno.ENAMETOOLONG:
                                pass
                            else:
                                raise
示例#4
0
def rebuildLink(instance_data_dir, instance_link_dir):
    for schema_name in os.listdir(instance_data_dir):
        hs_file_pat = os.path.join(instance_data_dir, schema_name, '*.hs')
        for fp in glob.glob(hs_file_pat):
            with open(fp) as f:
                header = kenshin.header(f)
                metric_list = header['tag_list']
                for metric in metric_list:
                    if metric != '':
                        link_path = getMetricPathByInstanceDir(instance_link_dir, metric)
                        _createLinkHelper(link_path, fp)
示例#5
0
文件: storage.py 项目: douban/Kenshin
def rebuildLink(instance_data_dir, instance_link_dir):
    for schema_name in os.listdir(instance_data_dir):
        hs_file_pat = os.path.join(instance_data_dir, schema_name, '*.hs')
        for fp in glob.glob(hs_file_pat):
            with open(fp) as f:
                header = kenshin.header(f)
                metric_list = header['tag_list']
                for metric in metric_list:
                    if metric != '':
                        link_path = getMetricPathByInstanceDir(instance_link_dir, metric)
                        try:
                            _createLinkHelper(link_path, fp)
                        except OSError as exc:
                            if exc.errno == errno.ENAMETOOLONG:
                                pass
                            else:
                                raise
示例#6
0
def resize_data_file(schema, data_file):
    print data_file
    with open(data_file) as f:
        header = kenshin.header(f)
    retentions = schema.archives
    old_retentions = [(x['sec_per_point'], x['count'])
                      for x in header['archive_list']]
    msg = []
    action = NO_OPERATION

    # x files factor
    if schema.xFilesFactor != header['x_files_factor']:
        action = CHANGE_META
        msg.append("x_files_factor: %f -> %f" %
                   (header['x_files_factor'], schema.xFilesFactor))

    # agg method
    old_agg_name = Agg.get_agg_name(header['agg_id'])
    if schema.aggregationMethod != old_agg_name:
        action = CHANGE_META
        msg.append("agg_name: %s -> %s" %
                   (old_agg_name, schema.aggregationMethod))

    # retentions
    if retentions != old_retentions:
        action = REBUILD
        msg.append("retentions: %s -> %s" % (old_retentions, retentions))

    if action == NO_OPERATION:
        print "No operation needed."
        return

    elif action == CHANGE_META:
        print 'Change Meta.'
        print '\n'.join(msg)
        change_meta(data_file, schema, header['max_retention'])
        return

    elif action == REBUILD:
        print 'Rebuild File.'
        print '\n'.join(msg)
        rebuild(data_file, schema, header, retentions)

    else:
        raise ValueError(action)
示例#7
0
def resize_data_file(schema, data_file):
    print data_file
    with open(data_file) as f:
        header = kenshin.header(f)
    retentions = schema.archives
    old_retentions = [(x['sec_per_point'], x['count'])
                      for x in header['archive_list']]
    msg = []
    action = NO_OPERATION

    # x files factor
    if schema.xFilesFactor != header['x_files_factor']:
        action = CHANGE_META
        msg.append("x_files_factor: %f -> %f" %
                   (header['x_files_factor'], schema.xFilesFactor))

    # agg method
    old_agg_name = Agg.get_agg_name(header['agg_id'])
    if schema.aggregationMethod != old_agg_name:
        action = CHANGE_META
        msg.append("agg_name: %s -> %s" %
                   (old_agg_name, schema.aggregationMethod))

    # retentions
    if retentions != old_retentions:
        action = REBUILD
        msg.append("retentions: %s -> %s" % (old_retentions, retentions))

    if action == NO_OPERATION:
        print "No operation needed."
        return

    elif action == CHANGE_META:
        print 'Change Meta.'
        print '\n'.join(msg)
        change_meta(data_file, schema, header['max_retention'])
        return

    elif action == REBUILD:
        print 'Rebuild File.'
        print '\n'.join(msg)
        rebuild(data_file, schema, header, retentions)

    else:
        raise ValueError(action)
示例#8
0
def rebuildLink(instance_data_dir, instance_link_dir):
    for schema_name in os.listdir(instance_data_dir):
        hs_file_pat = os.path.join(instance_data_dir, schema_name, '*.hs')
        for fp in glob.glob(hs_file_pat):
            with open(fp) as f:
                header = kenshin.header(f)
                metric_list = header['tag_list']
                for metric in metric_list:
                    if metric != '':
                        link_path = getMetricPathByInstanceDir(
                            instance_link_dir, metric)
                        try:
                            _createLinkHelper(link_path, fp)
                        except OSError as exc:
                            if exc.errno == errno.ENAMETOOLONG:
                                pass
                            else:
                                raise
示例#9
0
def rebuildIndex(instance_data_dir, instance_index_file):
    """
    Rebuild index file from data file, if a data file has no valid metric,
    we will remove it.
    """
    out = open(instance_index_file, 'w')
    for schema_name in os.listdir(instance_data_dir):
        hs_file_pat = os.path.join(instance_data_dir, schema_name, '*.hs')
        for fp in glob.glob(hs_file_pat):
            with open(fp) as f:
                empty_flag = True
                header = kenshin.header(f)
                metric_list = header['tag_list']
                file_id = splitext(basename(fp))[0]
                for i, metric in enumerate(metric_list):
                    if metric != '':
                        empty_flag = False
                        out.write('%s %s %s %s\n' %
                                  (metric, schema_name, file_id, i))
            if empty_flag:
                os.remove(fp)
    out.close()
示例#10
0
def rebuildIndex(instance_data_dir, instance_index_file):
    """
    Rebuild index file from data file, if a data file has no valid metric,
    we will remove it.
    """
    out = open(instance_index_file, 'w')
    for schema_name in os.listdir(instance_data_dir):
        hs_file_pat = os.path.join(instance_data_dir, schema_name, '*.hs')
        for fp in glob.glob(hs_file_pat):
            with open(fp) as f:
                empty_flag = True
                header = kenshin.header(f)
                metric_list = header['tag_list']
                file_id = splitext(basename(fp))[0]
                for i, metric in enumerate(metric_list):
                    if metric != '':
                        empty_flag = False
                        out.write('%s %s %s %s\n' %
                                  (metric, schema_name, file_id, i))
            if empty_flag:
                os.remove(fp)
    out.close()
示例#11
0
def run(filepath, archive_idx, point_idx, error):
    with open(filepath) as f:
        header = kenshin.header(f)
        archive = header['archive_list'][archive_idx]
        point_size = header['point_size']
        point_format = header['point_format']

        start_offset = archive['offset'] + point_idx * point_size
        if point_idx < 0:
            start_offset += archive['size']

        point = get_point(f, start_offset, point_size, point_format)
        print 'count: %s' % archive['count']

        if not error:
            metric = get_metric(filepath)
            date_str = timestamp_to_datestr(point[0])
            if metric:
                idx = header['tag_list'].index(metric)
                return (point[0], point[idx + 1]), date_str

            else:
                return point, date_str
        else:
            sec_per_point = archive['sec_per_point']
            ts = point[0]
            start_offset += point_size
            point_idx += 1
            while start_offset < archive['size'] + archive['offset']:
                point = get_point(f, start_offset, point_size, point_format)
                if point[0] != ts + sec_per_point:
                    return point_idx
                start_offset += point_size
                point_idx += 1
                ts = point[0]
            return 'No error!'
示例#12
0
def run(filepath, archive_idx, point_idx, error):
    with open(filepath) as f:
        header = kenshin.header(f)
        archive = header['archive_list'][archive_idx]
        point_size = header['point_size']
        point_format = header['point_format']

        start_offset = archive['offset'] + point_idx * point_size
        if point_idx < 0:
            start_offset += archive['size']

        point = get_point(f, start_offset, point_size, point_format)
        print 'count: %s' % archive['count']

        if not error:
            metric = get_metric(filepath)
            date_str = timestamp_to_datestr(point[0])
            if metric:
                idx = header['tag_list'].index(metric)
                return (point[0], point[idx + 1]), date_str

            else:
                return point, date_str
        else:
            sec_per_point = archive['sec_per_point']
            ts = point[0]
            start_offset += point_size
            point_idx += 1
            while start_offset < archive['size'] + archive['offset']:
                point = get_point(f, start_offset, point_size, point_format)
                if point[0] != ts + sec_per_point:
                    return point_idx
                start_offset += point_size
                point_idx += 1
                ts = point[0]
            return 'No error!'
示例#13
0
 def getIntervals(self):
   with open(self.fs_path) as f:
     start = time.time() - kenshin.header(f)['max_retention']
     end = max(os.stat(self.fs_path).st_mtime, start)
     return [(start, end)]
示例#14
0
 def get_intervals(self):
     with open(self.fs_path) as f:
         start = time.time() - kenshin.header(f)['max_retention']
     end = max(os.stat(self.fs_path).st_mtime, start)
     return IntervalSet([Interval(start, end)])
示例#15
0
#!/usr/bin/env python
# coding: utf-8


from pprint import pprint
import kenshin


if __name__ == '__main__':
    import sys
    if len(sys.argv) < 2:
        print 'Usage: kenshin-info.py <file_path>'
        sys.exit(1)
    path = sys.argv[1]
    with open(path) as f:
        pprint(kenshin.header(f))
示例#16
0
def resize_data_file(schema, data_file):
    print data_file
    rebuild = False
    with open(data_file) as f:
        header = kenshin.header(f)
    retentions = schema.archives
    old_retentions = [(x['sec_per_point'], x['count'])
                      for x in header['archive_list']]
    msg = ""
    if retentions != old_retentions:
        rebuild = True
        msg += "retentions:\n%s -> %s" % (old_retentions, retentions)

    if not rebuild:
        print "No operation needed."
        return

    print msg
    now = int(time.time())
    tmpfile = data_file + '.tmp'
    if os.path.exists(tmpfile):
        print "Removing previous temporary database file: %s" % tmpfile
        os.unlink(tmpfile)

    print "Creating new kenshin database: %s" % tmpfile
    kenshin.create(tmpfile,
                   [''] * len(header['tag_list']),
                   schema.archives,
                   header['x_files_factor'],
                   Agg.get_agg_name(header['agg_id']))
    for i, t in enumerate(header['tag_list']):
        kenshin.add_tag(t, tmpfile, i)

    size = os.stat(tmpfile).st_size
    old_size = os.stat(data_file).st_size

    print "Created: %s (%d bytes, was %d bytes)" % (
        tmpfile, size, old_size)

    print "Migrating data to new kenshin database ..."
    for archive in header['archive_list']:
        from_time = now - archive['retention'] + archive['sec_per_point']
        until_time = now
        _, timeinfo, values = kenshin.fetch(data_file, from_time, until_time)
        datapoints = zip(range(*timeinfo), values)
        datapoints = [[p[0], list(p[1])] for p in datapoints if p[1]]
        for _, values in datapoints:
            for i, v in enumerate(values):
                if v is None:
                    values[i] = NULL_VALUE
        kenshin.update(tmpfile, datapoints)
    backup = data_file + ".bak"

    print 'Renaming old database to: %s' % backup
    os.rename(data_file, backup)

    print "Renaming new database to: %s" % data_file
    try:
        os.rename(tmpfile, data_file)
    except Exception as e:
        print "Operation failed, restoring backup"
        os.rename(backup, data_file)
        raise e
示例#17
0
def resize_metric(metric, schema, data_dirs):
    rebuild = False
    msg = ""

    path = get_metric_path(metric, data_dirs)
    print path
    with open(path) as f:
        header = kenshin.header(f)
    retentions = schema.archives
    old_retentions = [(r['sec_per_point'], r['count'])
                      for r in header['archive_list']]

    if retentions != old_retentions:
        rebuild = True
        msg += "retentions:\n%s -> %s" % (retentions, old_retentions)

    if not rebuild:
        print 'No Operation Needed.'
    else:
        print msg
        now = int(time.time())

        tmpfile = path + '.tmp'
        if os.path.exists(tmpfile):
            print 'Removing previous temporary database file: %s' % tmpfile
            os.unlink(tmpfile)

        print 'Creating new kenshin database: %s' % tmpfile
        kenshin.create(tmpfile, [''] * len(header['tag_list']),
                       schema.archives, header['x_files_factor'],
                       Agg.get_agg_name(header['agg_id']))
        for i, t in enumerate(header['tag_list']):
            kenshin.add_tag(t, tmpfile, i)

        size = os.stat(tmpfile).st_size
        old_size = os.stat(tmpfile).st_size
        print 'Created: %s (%d bytes, was %d bytes)' % (tmpfile, size,
                                                        old_size)

        print 'Migrating data to new kenshin database ...'
        for archive in header['archive_list']:
            from_time = now - archive['retention'] + archive['sec_per_point']
            until_time = now
            _, timeinfo, values = kenshin.fetch(path, from_time, until_time)
            datapoints = zip(range(*timeinfo), values)
            datapoints = [[p[0], list(p[1])] for p in datapoints if p[1]]
            for ts, values in datapoints:
                for i, v in enumerate(values):
                    if v is None:
                        values[i] = NULL_VALUE
            kenshin.update(tmpfile, datapoints)

        backup = path + '.bak'
        print 'Renaming old database to: %s' % backup
        os.rename(path, backup)

        print 'Renaming new database to: %s' % path
        try:
            os.rename(tmpfile, path)
        except:
            os.rename(backup, path)
            raise IOError('Operation failed, restoring backup')