def get_graph(node, options): g = jsapi.QueryGraph() #we don't use this here # start_ts = parse_ts(options.start_ts) #coral_fidxs['Referrer_URL'], parsed_field_offsets = [coral_fidxs['timestamp'], coral_fidxs['HTTP_stat'],\ coral_fidxs['URL_requested'], coral_fidxs['nbytes'], \ coral_fidxs['dl_utime'], len(coral_types) ] f = jsapi.FileRead(g, options.fname, skip_empty=True) csvp = jsapi.CSVParse(g, coral_types) csvp.set_cfg("discard_off_size", "true") round = jsapi.TimeWarp(g, field=1, warp=options.warp_factor) round.set_cfg("wait_for_catch_up", "false") f.instantiate_on(node) local_raw_cube = define_raw_cube(g, options.cube_name, node, parsed_field_offsets, True) if not options.full_url: url_to_dom = jsapi.URLToDomain(g, field=coral_fidxs['URL_requested']) g.chain([f, csvp, round, url_to_dom, local_raw_cube]) else: g.chain([f, csvp, round, local_raw_cube]) return g
def src_to_badreferrer(g, data_src, node, options): only404s = jsapi.EqualsFilter(g, field= 1, targ=404) return g.chain([data_src, only404s, jsapi.Project(g, 5), jsapi.Project(g, 4), \ jsapi.Project(g, 2), jsapi.Project(g, 1), jsapi.URLToDomain(g, 1)])
def src_to_bad_doms(g, data_src, node, options): return g.chain([data_src, jsapi.Project(g, 5), jsapi.Project(g, 4), \ jsapi.Project(g, 3), jsapi.URLToDomain(g, 2)])
def src_to_domain(g, data_src, node, options): url2dom = jsapi.URLToDomain(g, 2) g.chain([data_src, jsapi.Project(g, 3), url2dom]) return url2dom
def get_graph(source_nodes, root_node, options): g= jsapi.QueryGraph() ANALYZE = not options.load_only LOADING = not options.analyze_only ECHO_RESULTS = not options.no_echo MULTIROUND = options.multiround HASH_SAMPLE = options.hash_sample LOCAL_THRESH = options.local_thresh if not LOADING and not ANALYZE: print "can't do neither load nor analysis" sys.exit(0) start_ts = parse_ts(options.start_ts) central_cube = g.add_cube("global_coral_urls") central_cube.instantiate_on(root_node) define_cube(central_cube) if ECHO_RESULTS: pull_q = jsapi.TimeSubscriber(g, {}, 5000 , sort_order="-count", num_results=10) pull_q.set_cfg("ts_field", 0) pull_q.set_cfg("start_ts", start_ts) pull_q.set_cfg("rollup_levels", "6,0,1") # every five seconds to match subscription. Roll up counts. pull_q.set_cfg("simulation_rate", 1) pull_q.set_cfg("window_offset", 6* 1000) #but trailing by a few echo = jsapi.Echo(g) echo.instantiate_on(root_node) g.chain([central_cube,pull_q, echo] ) add_latency_measure(g, central_cube, root_node, tti=4, hti=5, latencylog=options.latencylog) congest_logger = jsapi.AvgCongestLogger(g) congest_logger.instantiate_on(root_node) congest_logger.set_cfg("field", 3) if MULTIROUND: tput_merge = jsapi.MultiRoundCoord(g) tput_merge.set_cfg("start_ts", start_ts) tput_merge.set_cfg("window_offset", 5 * 1000) tput_merge.set_cfg("ts_field", 0) tput_merge.set_cfg("num_results", 10) tput_merge.set_cfg("sort_column", "-count") tput_merge.set_cfg("min_window_size", 5) tput_merge.set_cfg("rollup_levels", "10,0,1") # roll up response codes tput_merge.instantiate_on(root_node) pull_q.set_cfg("window_offset", 10* 1000) #but trailing by a few g.connect(tput_merge, congest_logger) parsed_field_offsets = [coral_fidxs['timestamp'], coral_fidxs['HTTP_stat'],\ coral_fidxs['URL_requested'], len(coral_types) ] for node, i in numbered(source_nodes, not LOADING): if not options.full_url: table_prefix = "local_coral_domains"; else: table_prefix = "local_coral_urls"; table_prefix += "_"+options.warp_factor; local_cube = g.add_cube(table_prefix+("_%d" %i)) define_cube(local_cube, parsed_field_offsets) print "cube output dimensions:", local_cube.get_output_dimensions() if LOADING: f = jsapi.FileRead(g, options.fname, skip_empty=True) csvp = jsapi.CSVParse(g, coral_types) csvp.set_cfg("discard_off_size", "true") round = jsapi.TimeWarp(g, field=1, warp=options.warp_factor) if not options.full_url: url_to_dom = jsapi.URLToDomain(g, field=coral_fidxs['URL_requested']) g.chain( [f, csvp, round, url_to_dom, local_cube] ) else: g.chain( [f, csvp, round, local_cube] ) f.instantiate_on(node) else: local_cube.set_overwrite(False) if MULTIROUND: pull_from_local = jsapi.MultiRoundClient(g) else: query_rate = 1000 if ANALYZE else 3600 * 1000 pull_from_local = jsapi.VariableCoarseningSubscriber(g, {}, query_rate) pull_from_local.set_cfg("simulation_rate", 1) pull_from_local.set_cfg("max_window_size", options.max_rollup) pull_from_local.set_cfg("ts_field", 0) pull_from_local.set_cfg("start_ts", start_ts) pull_from_local.set_cfg("window_offset", 2000) #but trailing by a few pull_from_local.set_cfg("sort_order", "-count") pull_from_local.instantiate_on(node) local_cube.instantiate_on(node) # count_logger = jsapi.CountLogger(g, field=3) timestamp_op= jsapi.TimestampOperator(g, "ms") hostname_extend_op = jsapi.ExtendOperator(g, "s", ["${HOSTNAME}"]) #used as dummy hostname for latency tracker hostname_extend_op.instantiate_on(node) lastOp = g.chain([local_cube, pull_from_local]) if HASH_SAMPLE: v = jsapi.VariableSampling(g, field=2, type='S') v.set_cfg("steps", options.steps) # print "connecting ", lastOp = g.connect(lastOp, v) g.add_policy( [pull_from_local, v] ) elif LOCAL_THRESH: v = jsapi.WindowLenFilter(g) v.set_cfg("err_field", 3) # print "connecting ", lastOp = g.connect(lastOp, v) g.add_policy( [pull_from_local, v] ) g.chain( [lastOp,timestamp_op, hostname_extend_op]) #output: 0=>time, 1=>response_code, 2=> url 3=> count, 4=> timestamp at source, 5=> hostname if MULTIROUND: g.connect(hostname_extend_op, tput_merge) else: g.connect(hostname_extend_op, congest_logger) timestamp_cube_op= jsapi.TimestampOperator(g, "ms") timestamp_cube_op.instantiate_on(root_node) g.chain ( [congest_logger, timestamp_cube_op, central_cube]) #input to central cube : 0=>time, 1=>response_code, 2=> url 3=> count, 4=> timestamp at source, 5=> hostname 6=> timestamp at union if options.bw_cap: congest_logger.set_inlink_bwcap(float(options.bw_cap)) return g
def get_graph(source_nodes, root_node, options): ECHO_RESULTS = not options.no_echo ANALYZE = not options.load_only LOADING = not options.analyze_only g= jsapi.QueryGraph() start_ts = parse_ts(options.start_ts) congest_logger = jsapi.AvgCongestLogger(g) congest_logger.instantiate_on(root_node) global_respcodes = g.add_cube("global_respcodes") define_schema_for_cube(global_respcodes) global_respcodes.instantiate_on(root_node) global_ratios = g.add_cube("global_ratios") define_schema_for_cube(global_ratios) global_ratios.add_agg("ratio", jsapi.Cube.AggType.MIN_D, 4) global_ratios.instantiate_on(root_node) pull_resp = jsapi.TimeSubscriber(g, {}, 1000) pull_resp.set_cfg("ts_field", 0) pull_resp.set_cfg("start_ts", start_ts) pull_resp.set_cfg("rollup_levels", "8,1,1") pull_resp.set_cfg("simulation_rate",1) pull_resp.set_cfg("window_offset", 5* 1000) compute_ratio = jsapi.SeqToRatio(g, url_field = 2, total_field = 3, respcode_field = 1) g.chain( [congest_logger, global_respcodes, pull_resp, compute_ratio, global_ratios] ) if ECHO_RESULTS: pull_q = jsapi.TimeSubscriber(g, {}, 1000, num_results= 5, sort_order="-ratio") pull_q.set_cfg("ts_field", 0) pull_q.set_cfg("start_ts", start_ts) pull_q.set_cfg("rollup_levels", "8,1,1") pull_q.set_cfg("simulation_rate",1) pull_q.set_cfg("window_offset", 12* 1000) #but trailing by a few echo = jsapi.Echo(g) echo.instantiate_on(root_node) g.chain( [global_ratios, pull_q, echo] ) parsed_field_offsets = [coral_fidxs['timestamp'], coral_fidxs['HTTP_stat'],\ coral_fidxs['URL_requested'], len(coral_fidxs) ] for node, i in numbered(source_nodes, False): table_prefix = "local_coral_respcodes"; table_prefix += "_"+options.warp_factor; local_cube = g.add_cube(table_prefix+("_%d" %i)) define_schema_for_cube(local_cube, parsed_field_offsets) if LOADING: f = jsapi.FileRead(g, options.fname, skip_empty=True) csvp = jsapi.CSVParse(g, coral_types) csvp.set_cfg("discard_off_size", "true") round = jsapi.TimeWarp(g, field=1, warp=options.warp_factor) round.set_cfg("wait_for_catch_up", "true") f.instantiate_on(node) url_to_dom = jsapi.URLToDomain(g, field=coral_fidxs['URL_requested']) g.chain( [f, csvp, round, url_to_dom, local_cube ] ) else: local_cube.set_overwrite(False) query_rate = 1000 if ANALYZE else 3600 * 1000 pull_from_local = jsapi.TimeSubscriber(g, {}, query_rate) pull_from_local.instantiate_on(node) pull_from_local.set_cfg("simulation_rate", 1) pull_from_local.set_cfg("ts_field", 0) pull_from_local.set_cfg("start_ts", start_ts) pull_from_local.set_cfg("window_offset", 2000) #but trailing by a few local_cube.instantiate_on(node) pull_from_local.instantiate_on(node) g.chain( [local_cube, pull_from_local, congest_logger] ) return g
def get_graph(source_nodes, root_node, options): g = jsapi.QueryGraph() ANALYZE = not options.load_only LOADING = not options.analyze_only ECHO_RESULTS = not options.no_echo MULTIROUND = options.multiround HASH_SAMPLE = options.hash_sample if not LOADING and not ANALYZE: print "can't do neither load nor analysis" sys.exit(0) start_ts = parse_ts(options.start_ts) central_cube = g.add_cube("global_coral_ua") central_cube.instantiate_on(root_node) define_cube(central_cube) if ECHO_RESULTS: pull_q = jsapi.TimeSubscriber(g, {}, 5000, sort_order="-count", num_results=10) pull_q.set_cfg("ts_field", 0) pull_q.set_cfg("start_ts", start_ts) pull_q.set_cfg("rollup_levels", "8,1") pull_q.set_cfg("simulation_rate", 1) pull_q.set_cfg("window_offset", 6 * 1000) #but trailing by a few echo = jsapi.Echo(g) echo.instantiate_on(root_node) g.chain([central_cube, pull_q, echo]) congest_logger = jsapi.AvgCongestLogger(g) congest_logger.instantiate_on(root_node) congest_logger.set_cfg("field", 3) if MULTIROUND: tput_merge = jsapi.MultiRoundCoord(g) tput_merge.set_cfg("start_ts", start_ts) tput_merge.set_cfg("window_offset", 5 * 1000) tput_merge.set_cfg("ts_field", 0) tput_merge.set_cfg("num_results", 10) tput_merge.set_cfg("sort_column", "-count") tput_merge.set_cfg("min_window_size", 5) # tput_merge.set_cfg("rollup_levels", "8,1") # roll up time tput_merge.instantiate_on(root_node) pull_q.set_cfg("window_offset", 10 * 1000) #but trailing by a few g.connect(tput_merge, congest_logger) parsed_field_offsets = [coral_fidxs['timestamp'], coral_fidxs['HTTP_stat'],\ coral_fidxs['URL_requested'], len(coral_types) ] for node, i in numbered(source_nodes, not LOADING): table_prefix = "local_coral_ua" table_prefix += "_" + options.warp_factor local_cube = g.add_cube(table_prefix + ("_%d" % i)) define_cube(local_cube, parsed_field_offsets) print "cube output dimensions:", local_cube.get_output_dimensions() if LOADING: f = jsapi.FileRead(g, options.fname, skip_empty=True) csvp = jsapi.CSVParse(g, coral_types) csvp.set_cfg("discard_off_size", "true") round = jsapi.TimeWarp(g, field=1, warp=options.warp_factor) if not options.full_url: url_to_dom = jsapi.URLToDomain( g, field=coral_fidxs['URL_requested']) g.chain([f, csvp, round, url_to_dom, local_cube]) else: g.chain([f, csvp, round, local_cube]) f.instantiate_on(node) else: local_cube.set_overwrite(False) if MULTIROUND: pull_from_local = jsapi.MultiRoundClient(g) else: query_rate = 1000 if ANALYZE else 3600 * 1000 pull_from_local = jsapi.VariableCoarseningSubscriber( g, {}, query_rate) pull_from_local.set_cfg("simulation_rate", 1) pull_from_local.set_cfg("max_window_size", options.max_rollup) pull_from_local.set_cfg("ts_field", 0) pull_from_local.set_cfg("start_ts", start_ts) pull_from_local.set_cfg("window_offset", 2000) #but trailing by a few pull_from_local.instantiate_on(node) local_cube.instantiate_on(node) lastOp = g.chain([local_cube, pull_from_local, congest_logger]) g.chain([congest_logger, central_cube]) return g