def gen_rule_queries(query_dir, experiment_conf, rule_number, rule): ''' Generate queries for all rules ''' if rule[TYPE] == FIRST: gen_first_rule_query(query_dir, rule_number) elif rule[TYPE] == PREV: gen_prev_rule_query(query_dir, rule_number, rule, 1) gen_someprev_rule_query(query_dir, rule_number, rule, 2) gen_allprev_rule_query(query_dir, rule_number, rule, 3) query = COND_QUERY.format(rn=rule_number) filename = query_dir + os.sep + 'r' + str(rule_number) + '.cql' write_to_txt(filename, query) # All input attributes (except identifier) # Get attribute list att_list = get_attribute_list(experiment_conf[ATT]) # Exclude sequence identifier (A1) att_list = att_list[1:] att_list = ', '.join(att_list) # Attributes non in TUP (transitive tuples) attnt_list = get_attribute_list(experiment_conf[ATT]) # Exclude sequence identifier (A1) and TUP attributes (A2, A3) attnt_list = attnt_list[3:] attnt_list = ', '.join(attnt_list) # Generate D_i Pref Queries query = DI_PREF_QUERY.format(att=att_list, attnt=attnt_list, cond=rule[COND_SIMPLE], pref=rule[PREF], rn=rule_number) filename = query_dir + os.sep + 'd' + str(rule_number) + '_pref.cql' write_to_txt(filename, query) # Generate D_i NonPref Queries query = \ DI_NONPREF_QUERY.format(att=att_list, attnt=attnt_list, cond=rule[COND_SIMPLE], nonpref=rule[NONPREF], rn=rule_number) filename = query_dir + os.sep + 'd' + str(rule_number) + '_nonpref.cql' write_to_txt(filename, query) # Generate D_i Queries # Get attribute list att_list = get_attribute_list(experiment_conf[ATT]) # Exclude sequence identifier (A1) att_list = att_list[1:] p_att_list = ['p.' + att for att in att_list] p_att_list = ', '.join(p_att_list) np_att_list = ['p.' + att + ' AS _' + att for att in att_list] np_att_list = ', '.join(np_att_list) ceteris_cond = get_ceteris_attributes(experiment_conf) ceteris_cond = ['p.' + att + ' = np.' + att for att in ceteris_cond] ceteris_cond = ' AND '.join(ceteris_cond) filename = query_dir + os.sep + 'd' + str(rule_number) + '.cql' query = DI_QUERY.format(p_att=p_att_list, np_att=np_att_list, rn=rule_number, cet_cond=ceteris_cond) write_to_txt(filename, query)
def gen_conseq_stream(configuration, experiment_conf): ''' Generate data stream ''' # Build attribute list att_list = get_attribute_list(experiment_conf[ATT], include_timestamp=True) # Get list of sequence identifiers id_list = gen_sequence_id_list(experiment_conf[NSQ]) # Randomize start timestamp for every identifier id_start_list = [] for rec in id_list: start = random.randint(0, experiment_conf[RAN] - 1) id_start_list.append((rec, start)) # Get maximum timestamp (maximum range + maximum slide) max_ts = get_max_data_timestamp(configuration[PARAMETER]) # File filename = get_data_file(configuration, experiment_conf) # First instant rec_list = gen_conseq_records(configuration, experiment_conf, id_start_list, 0) write_to_csv(filename, att_list, rec_list) # For each timestamp for timestamp in range(1, max_ts + 1): rec_list = gen_conseq_records(configuration, experiment_conf, id_start_list, timestamp) append_to_csv(filename, att_list, rec_list)
def gen_cql_queries(configuration, experiment_conf): ''' Generate queries with CQL original operators equivalent to BESTSEQ operator ''' filename = get_tup_file(configuration) gen_transitive_tup(configuration, filename) query_dir = get_query_dir(configuration, experiment_conf) # Generate z query (sequences) query = Z_QUERY.format(ran=experiment_conf[RAN], sli=experiment_conf[SLI]) filename = query_dir + os.sep + 'z.cql' write_to_txt(filename, query) # Generate p_join query (join z positions) # Get attribute list att_list = get_attribute_list(experiment_conf[ATT]) # Exclude sequence identifier (A1) att_list = att_list[1:] z1_att_list = ['z1.' + att for att in att_list] z1_att_list = ', '.join(z1_att_list) z2_att_list = ['z2.' + att + ' AS _' + att for att in att_list] z2_att_list = ', '.join(z2_att_list) query = P_JOIN_QUERY.format(z1_att=z1_att_list, z2_att=z2_att_list) filename = query_dir + os.sep + 'p_join.cql' write_to_txt(filename, query) # Generate query p (positions to be compared) diff_filter = ['NOT ' + att + ' = _' + att for att in att_list] diff_filter = ' OR '.join(diff_filter) query = P_QUERY.format(p_filter=diff_filter) filename = query_dir + os.sep + 'p.cql' write_to_txt(filename, query) # Get rule list rule_list = get_rule_list(configuration, experiment_conf) # Generate query t1 (identifier of dominant sequences) and # individual rule queries query_list = [] for index, rule in enumerate(rule_list): # Generates queries R_i and D_i for each rule gen_rule_queries(query_dir, experiment_conf, index + 1, rule) query = 'SELECT * FROM d' + str(index + 1) query_list.append(query) query = '\nUNION\n'.join(query_list) + ';' filename = query_dir + os.sep + 't1.cql' write_to_txt(filename, query) # Generate T_i Queries gen_cql_transitive_queries(experiment_conf, query_dir) # Generate ID query query = ID_QUERY.format(rn=experiment_conf[LEV]) filename = query_dir + os.sep + 'id.cql' write_to_txt(filename, query) # Generate query for final result query = 'SELECT z.* FROM z, id WHERE z.a1 = id.a1;' filename = query_dir + os.sep + 'equiv.cql' write_to_txt(filename, query)
def gen_cql_w_query(query_dir, experiment_conf): ''' Consider RANGE and SLIDE and generate W relation ''' # Build attribute names list att_list = get_attribute_list(experiment_conf[ATT]) att_str = ', '.join(att_list) # W query = CQL_W.format(att=att_str, ran=experiment_conf[RAN], sli=experiment_conf[SLI]) filename = query_dir + os.sep + 'w.cql' write_to_txt(filename, query)
def gen_cql_final_query(query_dir, experiment_conf): ''' Generate final query equivalent to SEQ operator for a range parameter ''' # Get attribute list att_list = get_attribute_list(experiment_conf[ATT], prefix='w.') att_str = ', '.join(att_list) # List of final position queries pos_query_list = [] for position in range(1, experiment_conf[RAN] + 1): pos_query = CQL_PI_FINAL.format(pos=position, att=att_str) pos_query_list.append(pos_query) # Equivalent is the union of final positions query = '\nUNION\n'.join(pos_query_list) + ';' filename = query_dir + os.sep + 'equiv.cql' write_to_txt(filename, query)
def gen_cql_final_query(query_dir, experiment_conf): ''' Generate final query equivalent to ENDSEQ operator ''' filename = query_dir + os.sep + 'equiv.cql' if os.path.isfile(filename): return range_value = experiment_conf[RAN] att_list = get_attribute_list(experiment_conf[ATT]) att_str = ', '.join(att_list) pos_query_list = [] for position in range(1, range_value + 1): pos_query = CQL_EQUIV.format(att=att_str, ran=position) pos_query_list.append(pos_query) query = '\nUNION\n'.join(pos_query_list) + ';' out_file = open(filename, 'w') out_file.write(query) out_file.close()
def gen_stream(configuration, experiment_conf): ''' Generate a data stream ''' # Build attribute list att_list = get_attribute_list(experiment_conf[ATT], include_timestamp=True) # Get list of sequence identifiers id_list = gen_sequence_id_list(experiment_conf[NSQ]) # Get maximum timestamp (maximum range + maximum slide) max_ts = get_max_data_timestamp(configuration[PARAMETER]) filename = get_data_file(configuration, experiment_conf) # First timestamp rec_list = gen_records(configuration, experiment_conf, id_list, 0) write_to_csv(filename, att_list, rec_list) # For each timestamp for timestamp in range(1, max_ts + 1): rec_list = gen_records(configuration, experiment_conf, id_list, timestamp) append_to_csv(filename, att_list, rec_list)
def get_register_stream(configuration, experiment_conf, include_tup=False): ''' Get register steam string ''' # Get attribute list att_list = get_attribute_list(experiment_conf[ATT]) att_list = [att + ' ' + INTEGER for att in att_list] att_str = ', '.join(att_list) # Get data filename filename = get_data_file(configuration, experiment_conf) # Register stream text = REG_STREAM_STR.format(atts=att_str, dfile=filename) if include_tup: text += '\n\n' # Register tup table filename = get_tup_file(configuration) text += REG_TUP_STR.format(dfile=filename) text += '\n\n' + '#' * 80 + '\n\n' return text
def gen_cql_transitive_queries(experiment_conf, query_dir): ''' Generate CQL queries for transitive comparisons ''' # Generate T_i Queries # Get attribute list att_list = get_attribute_list(experiment_conf[ATT]) # Exclude sequence identifier (A1) att_list = att_list[1:] p_att_list = ['p.' + att for att in att_list] p_att_list = ', '.join(p_att_list) np_att_list = ['np._' + att for att in att_list] np_att_list = ', '.join(np_att_list) join_att = ['p._' + att + ' = np.' + att for att in att_list] join_att = ' AND '.join(join_att) for level_number in range(2, experiment_conf[LEV] + 1): filename = query_dir + os.sep + 't' + str(level_number) + '.cql' prev_level = level_number - 1 query = TI_QUERY.format(prev_n=prev_level, p_att=p_att_list, np_att=np_att_list, p_np_join=join_att) write_to_txt(filename, query)
def gen_cql_queries(configuration, experiment_conf): ''' Generate all CQL queries equivalent to CONSEQ operator ''' query_dir = get_query_dir(configuration, experiment_conf) filename = query_dir + os.sep + 'table_ots.cql' write_to_txt(filename, CQL_TABLE_OTS) filename = query_dir + os.sep + 'stream_ots.cql' write_to_txt(filename, CQL_STREAM_OTS) gen_cql_z_query(query_dir, experiment_conf) filename = query_dir + os.sep + 'z_prime.cql' write_to_txt(filename, CQL_Z_PRIME) filename = query_dir + os.sep + 'p_start.cql' write_to_txt(filename, CQL_P_START) filename = query_dir + os.sep + 'p_end.cql' write_to_txt(filename, CQL_P_END) filename = query_dir + os.sep + 'p_start_end.cql' write_to_txt(filename, CQL_P_START_END) filename = query_dir + os.sep + 'equiv.cql' att_list = get_attribute_list(experiment_conf[ATT], 'z.') att_list = ', '.join(att_list) query = CQL_EQUIV.format(zatt=att_list) write_to_txt(filename, query)