示例#1
0
def parse_line(line: str, tag_lst: list):
    data_len = len(tag_lst)
    if line is None:
        time_lst = [None for _ in range(data_len)]
    else:
        lst = line.split()[get_log_header_col():]
        # eval all between the ':' and 's'
        my_str = ''.join(lst)
        colon_idx_lst = []
        unit_idx_lst2 = []
        for idx, ch in enumerate(my_str):
            if ch == ':':
                colon_idx_lst.append(idx)
            if ch == 's':
                # last ch is numeric
                if idx - 1 > 0 and my_str[idx -
                                          1] in [str(i) for i in range(10)]:
                    unit_idx_lst2.append(idx)

        # assert len(colon_idx_lst) == len(unit_idx_lst2) == data_len
        if len(colon_idx_lst) == len(unit_idx_lst2) == data_len:
            time_lst = [
                eval(my_str[beg + 1:end])
                for beg, end in zip(colon_idx_lst, unit_idx_lst2)
            ]
        else:
            logger = exec_utils.get_logger('/home/yche/logs/' + 'error.log',
                                           __name__)
            logger.info('err:' + my_str)
            time_lst = [None for _ in range(data_len)]
    return dict(
        zip(tag_lst,
            map(lambda ele: None if ele is None else float(ele), time_lst)))
if __name__ == '__main__':
    base_dir = '/home/yche/'
    # base_dir = '/Users/cheyulin/'
    os.system('mkdir -p {}logs/'.format(base_dir))
    my_res_log_file_folder = config_lst[0]
    my_gpu_lst = config_lst[2]

    for hostname in my_gpu_lst:
        app_md_path = init_folder_md_json_file('..', hostname,
                                               user_output_md_file)
        for my_md_algorithm_name in config_lst[1]:
            json_file_path = my_res_log_file_folder + '-' + my_md_algorithm_name + '.json'
            json_file_path = os.sep.join(
                ['../data-json/', hostname, json_file_path])
            log_path = my_res_log_file_folder + '-' + my_md_algorithm_name + '.log'
            logger = exec_utils.get_logger(
                '{}logs/'.format(base_dir) + log_path, __name__)

            with open(app_md_path, 'a+') as output_md_file:
                # Dataset -> Thread Num -> Detailed Time Info
                config_dict = get_config_dict_via_hostname(hostname)
                root_dir = os.sep.join([
                    config_dict[exp_res_root_mount_path_tag],
                    my_res_log_file_folder,
                    hostname,
                ])
                dataset_lst = load_data_sets()
                reorder_tag = 'org'
                t_lst = list(map(str, config_dict[thread_num_lst_tag]))

                # Fetch data and parse it as a markdown file
                fetch_statistics(root_dir=root_dir,
示例#3
0
from data_analysis.figures_icde19.play.common import *


def parse_ept():
    mount_dir = '{}/workspace/yche/git-repos/OutOfCoreSCAN/python_experiments/exp_results/exp-2019-10-06-parameters' \
                '/ustgpu2/{}/org/40/{}/'. \
        format('/home/yche/mnt/ustgpu2', 'webgraph_eu', 'dstc-tp')
    data = dict()
    for ept in [1.25 * (10**8) * (2**i) for i in range(9)]:
        file_path = '{}/pkt-varying-{}.log'.format(mount_dir, ept)
        lines = get_file_lines(file_path)
        line = '\n'.join(lines)
        content = (ret_all('TC-levels: .*, Time: .*]', line)[0])
        tc_dict = eval('{' + content.replace(
            'TC-levels', "'TC-levels'").replace('Time', "'Time'") + '}')
        time_dict = parse_lines(lines, local_tag_lst, total_time_tag,
                                total_tag_lst)
        union(tc_dict, time_dict)
        data[ept] = tc_dict
        print(tc_dict)
    with open('{}/ept.json'.format(varying_parameter_json_dir), 'w') as ofs:
        ofs.write(json.dumps(data, indent=4))


if __name__ == '__main__':
    base_dir = '/home/yche/'
    os.system('mkdir -p {}logs/'.format(base_dir))
    logger = get_logger('/home/yche/tmp.log', __name__)

    parse_ept()
示例#4
0
import os

from exec_utilities.exec_utils import get_logger

if __name__ == '__main__':
    files = [
        # 'run_k_truss_reorderd_graph_PP_SI.py',
        # 'run_k_truss_reorderd_graph_nvprof.py',
        # 'run_k_truss_reorderd_graph_cuda.py',
        # 'run_k_truss_reorderd_graph.py',
        'run_k_truss_performance.py',
        'run_necleus_decomposition.py',
    ]

    work_dir = '/home/zlai/workspace/yche/git-repos/' \
               'OutOfCoreSCAN/python_experiments/exp_results'
    os.system('mkdir -p {}'.format(work_dir))
    hostname = 'ustgpu2'
    logger = get_logger(os.sep.join([work_dir, hostname + '.log']),
                        name=__name__)

    for i in range(3):
        for f in files:
            cmd = 'python /home/zlai/workspace/yche/git-repos/' \
                  'OutOfCoreSCAN/python_experiments/run_experiments/{}'.format(f)
            logger.info(cmd)
            os.system(cmd)
import os

if __name__ == '__main__':
    data_set_lst = [
        # "snap_livejournal",
        # "snap_orkut",
        # "webgraph_eu",
        # "webgraph_uk",
        # "webgraph_webbase",
        # "webgraph_it",
        # "webgraph_twitter",
        # "snap_friendster",
        # "rmat_v50m_e0.5g",
        # "rmat_v5m_e0.5g",
        # "rmat_v0.5m_e0.5g",
        "s22-16",
        "s23-16",
        "s24-16",
        "s25-16",
        "s26-16",
        "s27-16",
        "s28-16",
        "s29-16"
    ]
    # folder_root = '/home/yche/mnt/luocpu9/mnt/storage1/yche/datasets'
    folder_root = '/home/yche/mnt/gpu24/mnt/nvme-ssd/yche/datasets'
    logger = exec_utils.get_logger("/home/yche/log.log", name=__name__)
    for data_set in data_set_lst:
        cmd = 'md5sum ' + os.sep.join([folder_root, data_set, 'ktruss-pkt-inter-shrink.histogram'])
        logger.info(time_out_util.run_with_timeout(cmd, timeout_sec=10))

if __name__ == '__main__':
    os.system('mkdir -p /home/yche/logs/')
    my_res_log_file_folder = config_lst[0]
    my_gpu_lst = config_lst[2]
    dataset_lst = load_data_sets()
    t_num = '60'
    for hostname in my_gpu_lst:
        app_md_path = init_folder_md_json_file('..', hostname, user_output_md_file)
        app_md_simple_path = init_folder_md_json_file('..', hostname, user_output_md_file_simple)
        for my_md_algorithm_name in config_lst[1]:
            json_file_path = my_res_log_file_folder + '-' + my_md_algorithm_name + '.json'
            json_file_path = os.sep.join(['../data-json/', hostname, json_file_path])
            log_path = my_res_log_file_folder + '-' + my_md_algorithm_name + '.log'
            logger = exec_utils.get_logger('/home/yche/logs/' + log_path, __name__)

            with open(app_md_path, 'a+') as output_md_file:
                with open(app_md_simple_path, 'a+') as output_md_file_simple:
                    # Dataset -> Thread Num -> Detailed Time Info
                    config_dict = get_config_dict_via_hostname(hostname)
                    root_dir = os.sep.join(
                        [config_dict[exp_res_root_mount_path_tag], my_res_log_file_folder, hostname, ])
                    # dataset_lst = config_dict[data_set_lst_tag]
                    reorder_tag = 'org'
                    # t_lst = list(map(str, config_dict[thread_num_lst_tag]))
                    t_lst = list(map(str, config_dict[thread_num_lst_tag]))

                    # Fetch data and parse it as a markdown file
                    fetch_statistics(root_dir=root_dir, dataset_lst=dataset_lst, reorder_tag=reorder_tag, t_lst=t_lst,
                                     algorithm=my_md_algorithm_name, json_file_path=json_file_path)
        return eval(''.join(ifs.readlines()))


# data set abbreviation dictionary
data_names = get_name_dict()

# figure parameters
FIG_SIZE_MULTIPLE = (32, 6)
LABEL_SIZE = 22
TICK_SIZE = 22
LEGEND_SIZE = 22

# get the data for figures
index_info_dict = get_index_dict_with_reads('../../data_analysis')
logger = get_logger(
    '{}/tkde_indexing_time_drawing_log.log'.format(yche_logger_root_dir),
    __name__)
logger.info(index_info_dict)
digg_data_tag = 'digg-friends'
flickr_data_tag = 'flickr-growth'


def get_tsf_index_disk_size(v_num):
    sample_one_way_graph_num = 100
    return float(
        format_str(sample_one_way_graph_num * size_of_int * v_num /
                   (1024.**2)))


other_indexing_dict = {
    local_push_tag: {
        for t_num in t_lst:
            file_path = os.sep.join(
                [root_dir, dataset, reorder_tag, t_num, algorithm + '.log'])
            logger.info(file_path)
            lines = get_file_lines(file_path)
            eid_time_lst = get_tc_time_lst('\n'.join(lines))
            my_dict[dataset][t_num] = min(eid_time_lst)
    with open(json_file_path, 'w') as ofs:
        ofs.write(json.dumps(my_dict, indent=4))


if __name__ == '__main__':
    base_dir = '/home/yche/'
    os.system('mkdir -p {}'.format(si_dir))

    logger = exec_utils.get_logger(
        '{}logs/'.format(base_dir) + 'ustgpu2-si.log', __name__)
    root_dir_template = '{}mnt/ustgpu2/workspace/yche/git-repos/' \
                        'OutOfCoreSCAN/python_experiments/exp_results/{}/ustgpu2'
    data_set_lst = ['webgraph_eu', 'webgraph_it', 'webgraph_twitter']

    dtc_wp_root_dir = 'exp-2019-10-05-eid'
    baseline_root_dir = 'exp-2019-10-04-eid'
    for name in ['pkt-eval-tc-wp']:
        fetch_statistics(root_dir_template.format(base_dir, dtc_wp_root_dir),
                         data_set_lst, 'org',
                         [str(i) for i in [1, 2, 4, 8, 16, 32, 40]], name,
                         '{}/{}'.format(si_dir, name + '.json'))
    for name in ['pkt-eval-tc-dtc']:
        fetch_statistics(root_dir_template.format(base_dir, dtc_wp_root_dir),
                         data_set_lst, 'org', [str(i) for i in [40]], name,
                         '{}/{}'.format(si_dir, name + '.json'))
示例#9
0
from data_analysis.util.read_file_utils_updated import *
from config import *
from exec_utilities import exec_utils

iter_tag = 'iter'
time_tag = 'time'
left_tag = 'left'
total_tag = 'total'

if __name__ == '__main__':
    logger = exec_utils.get_logger('/home/yche/analyzing_peel.log', __name__)

    config_dict = get_config_dict(gpu23_tag, '../..')

    root_dir = os.sep.join(
        [config_dict[exp_res_root_mount_path_tag], 'exp-2019-06-24-k-truss-refactor', gpu23_tag, ])

    dataset_lst = config_dict[data_set_lst_tag]


    def parse_line(line):
        lst = line.split()[get_log_header_col():]
        triple = [literal.replace('s', '').replace(',', '') for literal in [lst[-6], lst[-3], lst[-1]]]
        left = triple[2].split('/')[0]
        right = triple[2].split('/')[1]
        return dict(
            zip([iter_tag, time_tag, left_tag, total_tag],
                [int(triple[0]), float(triple[1]), int(left), int(right)], ))


    for dataset in dataset_lst:
示例#10
0
    my_dict = dict()
    for dataset in dataset_lst:
        my_dict[dataset] = dict()
        for t_num in t_lst:
            file_path = os.sep.join(
                [root_dir, dataset, reorder_tag, t_num, algorithm + '.log'])
            logger.info(file_path)
            lines = get_file_lines(file_path)
            functor = max if algorithm == 'pkt-eid' else min
            eid_time_lst = get_eid_time_lst('\n'.join(lines))
            my_dict[dataset][t_num] = functor(eid_time_lst)
    with open(json_file_path, 'w') as ofs:
        ofs.write(json.dumps(my_dict, indent=4))


if __name__ == '__main__':
    base_dir = '/home/yche/'
    os.system('mkdir -p {}'.format(gpu23_pp_dir))

    logger = exec_utils.get_logger('{}logs/'.format(base_dir) + 'gpu23-pp.log',
                                   __name__)

    my_res_log_file_folder = 'exp-2019-10-07-eid'
    root_dir = '{}mnt/luocpu9/mnt/storage1/yche/git-repos/' \
               'OutOfCoreSCAN/python_experiments/exp_results/{}/gpu23'.format(base_dir, my_res_log_file_folder)
    for name in ['pkt-eid', 'pkt-eid-parallel']:
        fetch_statistics(root_dir,
                         ['webgraph_eu', 'webgraph_it', 'webgraph_twitter'],
                         'org', [str(i) for i in [1, 2, 4, 8, 16, 32, 40]],
                         name, '{}/{}'.format(gpu23_pp_dir, name + '.json'))