def experiment_changing_input_size(repetitions=100): # Create a graph fn = syn.generate_network_with(num_nodes=100000, num_nodes_per_table=10, num_schema_sim=90000, num_content_sim=90000, num_pkfk=90000) api = API(fn) perf_results = dict() # input size from 1 to 100 for i in range(50): i = i + 1 nodes = fn.fields_degree(i) nids = [x for x, y in nodes] info = fn.get_info_for(nids) hits = fn.get_hits_from_info(info) in_drs = api.drs_from_hits(hits) q2, q3, q4 = run_all_queries(repetitions, api_obj=api, in_drs_obj=in_drs) percentile_results = get_percentiles([q2, q3, q4]) perf_results[i] = percentile_results return perf_results
def experiment_changing_max_hops_tc_queries(repetitions=100): perf_results = dict() for i in range(10): i = i + 1 fn = syn.generate_network_with(num_nodes=100000, num_nodes_per_table=10, num_schema_sim=100000, num_content_sim=100000, num_pkfk=100000) api = API(fn) nodes = fn.fields_degree(1) nids = [x for x, y in nodes] info = fn.get_info_for(nids) hits = fn.get_hits_from_info(info) in_drs = api.drs_from_hits(hits) query_times = [] for repet in range(repetitions): s = time.time() res = api.traverse(in_drs, Relation.SCHEMA_SIM, max_hops=i) e = time.time() query_times.append((e - s)) percentile_results = get_percentiles([query_times]) perf_results[i] = percentile_results return perf_results
def test(): # Fixed graph density, differing sizes (nodes) fn = syn.generate_network_with(num_nodes=100, num_nodes_per_table=10, num_schema_sim=200, num_content_sim=150, num_pkfk=50) api = API(fn) nodes = fn.fields_degree(3) nids = [x for x, y in nodes] info = fn.get_info_for(nids) hits = fn.get_hits_from_info(info) in_drs = api.drs_from_hits(hits) q2, q3, q4 = run_all_queries(100, api_obj=api, in_drs_obj=in_drs) nq2 = np.array(q2) p5 = np.percentile(nq2, 5) p50 = np.percentile(nq2, 50) p95 = np.percentile(nq2, 95) print("q2: " + str(p5) + " - " + str(p50) + " - " + str(p95)) nq3 = np.array(q3) p5 = np.percentile(nq3, 5) p50 = np.percentile(nq3, 50) p95 = np.percentile(nq3, 95) print("q3: " + str(p5) + " - " + str(p50) + " - " + str(p95)) nq4 = np.array(q4) p5 = np.percentile(nq4, 5) p50 = np.percentile(nq4, 50) p95 = np.percentile(nq4, 95) print("q4: " + str(p5) + " - " + str(p50) + " - " + str(p95))
def experiment_changing_graph_density_constant_size(repetitions=10): size = 100000 densities = [100, 1000, 10000, 100000, 1000000] perf_results = dict() for density in densities: fn = syn.generate_network_with(num_nodes=size, num_nodes_per_table=10, num_schema_sim=density, num_content_sim=density, num_pkfk=density) api = API(fn) nodes = fn.fields_degree(3) nids = [x for x, y in nodes] info = fn.get_info_for(nids) hits = fn.get_hits_from_info(info) in_drs = api.drs_from_hits(hits) q2, q3, q4 = run_all_queries(repetitions, api_obj=api, in_drs_obj=in_drs) percentile_results = get_percentiles([q2, q3, q4]) perf_results[density] = percentile_results return perf_results