def run(self): self.queries = [] for table_of_from in self.tables_of_from: links = [] query = Query() query.set_from(From(table_of_from)) join_object = Join() for column in self.columns_of_select: if column not in self.database_dico[table_of_from]: foreign_table = self.get_tables_of_column(column)[0] join_object.add_table(foreign_table) link = self.get_link(table_of_from, foreign_table) if not link: self.queries = ParsingException( "There is at least column `" + column + "` that is unreachable from table `" + table_of_from.upper() + "`!") return else: links.extend(link) for column in self.columns_of_where: if column not in self.database_dico[table_of_from]: foreign_table = self.get_tables_of_column(column)[0] join_object.add_table(foreign_table) link = self.get_link(table_of_from, foreign_table) if not link: self.queries = ParsingException( "There is at least column `" + column + "` that is unreachable from table `" + table_of_from.upper() + "`!") return else: links.extend(link) join_object.set_links(self.unique_ordered(links)) query.set_join(join_object) self.queries.append(query)
def parse_sentence(self, sentence, stopwordsFilter=None): sys.tracebacklimit = 0 # Remove traceback from Exception number_of_table = 0 number_of_select_column = 0 number_of_where_column = 0 last_table_position = 0 columns_of_select = [] columns_of_where = [] if stopwordsFilter is not None: sentence = stopwordsFilter.filter(sentence) input_for_finding_value = sentence.rstrip( string.punctuation.replace('"', '').replace("'", "")) columns_of_values_of_where = [] filter_list = [",", "!"] for filter_element in filter_list: input_for_finding_value = input_for_finding_value.replace( filter_element, " ") input_word_list = input_for_finding_value.split() number_of_where_column_temp = 0 number_of_table_temp = 0 last_table_position_temp = 0 start_phrase = '' med_phrase = '' # TODO: merge this part of the algorithm (detection of values of where) # in the rest of the parsing algorithm (about line 725) ''' for i in range(0, len(input_word_list)): for table_name in self.database_dico: if (input_word_list[i] == table_name) or ( input_word_list[i] in self.database_object. get_table_by_name(table_name).equivalences): if number_of_table_temp == 0: start_phrase = input_word_list[:i] number_of_table_temp += 1 last_table_position_temp = i columns = self.database_object.get_table_by_name( table_name).get_columns() for column in columns: if (input_word_list[i] == column.name) or (input_word_list[i] in column.equivalences): if number_of_where_column_temp == 0: med_phrase = input_word_list[ len(start_phrase):last_table_position_temp + 1] number_of_where_column_temp += 1 break else: if (number_of_table_temp != 0) and ( number_of_where_column_temp == 0) and (i == (len(input_word_list) - 1)): med_phrase = input_word_list[len(start_phrase):] else: continue break end_phrase = input_word_list[len(start_phrase) + len(med_phrase):] irext = ' '.join(end_phrase) ''' @todo set this part of the algorithm (detection of values of where) in the WhereParser thread ''' if irext: irext = self.remove_accents(irext.lower()) filter_list = [",", "!"] for filter_element in filter_list: irext = irext.replace(filter_element, " ") assignment_list = self.equal_keywords + self.like_keywords + self.greater_keywords + self.less_keywords + self.negation_keywords # As these words can also be part of assigners # custom operators added as they can be possibilities assignment_list.append(':') assignment_list.append('=') # Algorithmic logic for best substitution for extraction of values with the help of assigners. assignment_list = self.transformation_sort(assignment_list) maverickjoy_general_assigner = "*res*@3#>>*" maverickjoy_like_assigner = "*like*@3#>>*" for idx, assigner in enumerate(assignment_list): if assigner in self.like_keywords: assigner = str(" " + assigner + " ") irext = irext.replace( assigner, str(" " + maverickjoy_like_assigner + " ")) else: assigner = str(" " + assigner + " ") # Reason for adding " " these is according to the LOGIC implemented assigner operators help us extract the value, # hence they should be independent entities not part of some other big entity else logic will fail. # for eg -> "show data for city where cityName where I like to risk my life is Pune" will end up extacting , # 'k' and '1' both. I know its a lame sentence but something like this could be a problem. irext = irext.replace( assigner, str(" " + maverickjoy_general_assigner + " ")) # replace all spaces from values to <_> for proper value assignment in SQL # eg. (where name is 'abc def') -> (where name is abc<_>def) for i in re.findall("(['\"].*?['\"])", irext): irext = irext.replace( i, i.replace(' ', '<_>').replace("'", '').replace('"', '')) irext_list = irext.split() for idx, x in enumerate(irext_list): index = idx + 1 if x == maverickjoy_like_assigner: if index < len(irext_list) and irext_list[index] != maverickjoy_like_assigner and irext_list[index] !=\ maverickjoy_general_assigner: # replace back <_> to spaces from the values assigned columns_of_values_of_where.append( str("'%" + str(irext_list[index]).replace('<_>', ' ') + "%'")) if x == maverickjoy_general_assigner: if index < len(irext_list) and irext_list[index] != maverickjoy_like_assigner and irext_list[index] != \ maverickjoy_general_assigner: # replace back <_> to spaces from the values assigned columns_of_values_of_where.append( str("'" + str(irext_list[index]).replace('<_>', ' ') + "'")) ''' ----------------------------------------------------------------------------------------------------------- ''' tables_of_from = [] select_phrase = '' from_phrase = '' where_phrase = '' words = re.findall(r"[\w]+", self.remove_accents(sentence)) for i in range(0, len(words)): for table_name in self.database_dico: if (words[i] == table_name) or ( words[i] in self.database_object.get_table_by_name( table_name).equivalences): if number_of_table == 0: select_phrase = words[:i] tables_of_from.append(table_name) number_of_table += 1 last_table_position = i columns = self.database_object.get_table_by_name( table_name).get_columns() for column in columns: if (words[i] == column.name) or (words[i] in column.equivalences): if number_of_table == 0: columns_of_select.append(column.name) number_of_select_column += 1 else: if number_of_where_column == 0: from_phrase = words[len(select_phrase ):last_table_position + 1] columns_of_where.append(column.name) number_of_where_column += 1 break else: if (number_of_table != 0) and (number_of_where_column == 0) and (i == (len(words) - 1)): from_phrase = words[len(select_phrase):] where_phrase = words[len(select_phrase) + len(from_phrase):] if (number_of_select_column + number_of_table + number_of_where_column) == 0: raise ParsingException("No keyword found in sentence!") if len(tables_of_from) > 0: from_phrases = [] previous_index = 0 for i in range(0, len(from_phrase)): for table in tables_of_from: if (from_phrase[i] == table) or ( from_phrase[i] in self.database_object. get_table_by_name(table).equivalences): from_phrases.append(from_phrase[previous_index:i + 1]) previous_index = i + 1 last_junction_word_index = -1 for i in range(0, len(from_phrases)): number_of_junction_words = 0 number_of_disjunction_words = 0 for word in from_phrases[i]: if word in self.junction_keywords: number_of_junction_words += 1 if word in self.disjunction_keywords: number_of_disjunction_words += 1 if (number_of_junction_words + number_of_disjunction_words) > 0: last_junction_word_index = i if last_junction_word_index == -1: from_phrase = sum(from_phrases[:1], []) where_phrase = sum(from_phrases[1:], []) + where_phrase else: from_phrase = sum(from_phrases[:last_junction_word_index + 1], []) where_phrase = sum(from_phrases[last_junction_word_index + 1:], []) + where_phrase real_tables_of_from = [] for word in from_phrase: for table in tables_of_from: if (word == table) or (word in self.database_object. get_table_by_name(table).equivalences): real_tables_of_from.append(table) tables_of_from = real_tables_of_from if len(tables_of_from) == 0: raise ParsingException("No table name found in sentence!") group_by_phrase = [] order_by_phrase = [] new_where_phrase = [] previous_index = 0 previous_phrase_type = 0 yet_where = 0 for i in range(0, len(where_phrase)): if where_phrase[i] in self.order_by_keywords: if yet_where > 0: if previous_phrase_type == 1: order_by_phrase.append(where_phrase[previous_index:i]) elif previous_phrase_type == 2: group_by_phrase.append(where_phrase[previous_index:i]) else: new_where_phrase.append(where_phrase[previous_index:i]) previous_index = i previous_phrase_type = 1 yet_where += 1 if where_phrase[i] in self.group_by_keywords: if yet_where > 0: if previous_phrase_type == 1: order_by_phrase.append(where_phrase[previous_index:i]) elif previous_phrase_type == 2: group_by_phrase.append(where_phrase[previous_index:i]) else: new_where_phrase.append(where_phrase[previous_index:i]) previous_index = i previous_phrase_type = 2 yet_where += 1 if previous_phrase_type == 1: order_by_phrase.append(where_phrase[previous_index:]) elif previous_phrase_type == 2: group_by_phrase.append(where_phrase[previous_index:]) else: new_where_phrase.append(where_phrase) try: select_parser = SelectParser( columns_of_select, tables_of_from, select_phrase, self.count_keywords, self.sum_keywords, self.average_keywords, self.max_keywords, self.min_keywords, self.distinct_keywords, self.database_dico, self.database_object) from_parser = FromParser(tables_of_from, columns_of_select, columns_of_where, self.database_object) where_parser = WhereParser( new_where_phrase, tables_of_from, columns_of_values_of_where, self.count_keywords, self.sum_keywords, self.average_keywords, self.max_keywords, self.min_keywords, self.greater_keywords, self.less_keywords, self.between_keywords, self.negation_keywords, self.junction_keywords, self.disjunction_keywords, self.like_keywords, self.distinct_keywords, self.database_dico, self.database_object) group_by_parser = GroupByParser(group_by_phrase, tables_of_from, self.database_dico, self.database_object) order_by_parser = OrderByParser(order_by_phrase, tables_of_from, self.asc_keywords, self.desc_keywords, self.database_dico, self.database_object) select_parser.start() from_parser.start() where_parser.start() group_by_parser.start() order_by_parser.start() queries = from_parser.join() except: raise ParsingException("Parsing error occured in thread!") if isinstance(queries, ParsingException): raise queries try: select_objects = select_parser.join() where_objects = where_parser.join() group_by_objects = group_by_parser.join() order_by_objects = order_by_parser.join() except: raise ParsingException("Parsing error occured in thread!") for i in range(0, len(queries)): query = queries[i] query.set_select(select_objects[i]) query.set_where(where_objects[i]) query.set_group_by(group_by_objects[i]) query.set_order_by(order_by_objects[i]) return queries
class FromParser(Thread): def __init__(self, tables_of_from, columns_of_select, columns_of_where, database_object): Thread.__init__(self) self.queries = [] self.tables_of_from = tables_of_from self.columns_of_select = columns_of_select self.columns_of_where = columns_of_where self.database_object = database_object self.database_dico = self.database_object.get_tables_into_dictionary() def get_tables_of_column(self, column): tmp_table = [] for table in self.database_dico: if column in self.database_dico[table]: tmp_table.append(table) return tmp_table def intersect(self, a, b): return list(set(a) & set(b)) def difference(self, a, b): differences = [] for _list in a: if _list not in b: differences.append(_list) return differences def is_direct_join_is_possible(self, table_src, table_trg): fk_column_of_src_table = self.database_object.get_foreign_keys_of_table( table_src) fk_column_of_trg_table = self.database_object.get_foreign_keys_of_table( table_trg) for column in fk_column_of_src_table: if column.is_foreign()['foreign_table'] == table_trg: return [(table_src, column.name), (table_trg, column.is_foreign()['foreign_column'])] for column in fk_column_of_trg_table: if column.is_foreign()['foreign_table'] == table_src: return [(table_src, column.is_foreign()['foreign_column']), (table_trg, column.name)] # pk_table_src = self.database_object.get_primary_key_names_of_table(table_src) # pk_table_trg = self.database_object.get_primary_key_names_of_table(table_trg) # match_pk_table_src_with_table_trg = self.intersect(pk_table_src, self.database_dico[table_trg]) # match_pk_table_trg_with_table_src = self.intersect(pk_table_trg, self.database_dico[table_src]) # if len(match_pk_table_src_with_table_trg) >= 1: # return [(table_trg, match_pk_table_src_with_table_trg[0]), (table_src, match_pk_table_src_with_table_trg[0])] # elif len(match_pk_table_trg_with_table_src) >= 1: # return [(table_trg, match_pk_table_trg_with_table_src[0]), # (table_src, match_pk_table_trg_with_table_src[0])] def get_all_direct_linked_tables_of_a_table(self, table_src): links = [] for table_trg in self.database_dico: if table_trg != table_src: link = self.is_direct_join_is_possible(table_src, table_trg) if link is not None: links.append(link) return links def is_join(self, historic, table_src, table_trg): historic = historic links = self.get_all_direct_linked_tables_of_a_table(table_src) differences = [] for join in links: if join[0][0] not in historic: differences.append(join) links = differences for join in links: if join[1][0] == table_trg: return [0, join] path = [] historic.append(table_src) for join in links: result = [1, self.is_join(historic, join[1][0], table_trg)] if result[1] != []: if result[0] == 0: path.append(result[1]) path.append(join) else: path = result[1] path.append(join) return path def get_link(self, table_src, table_trg): path = self.is_join([], table_src, table_trg) if len(path) > 0: path.pop(0) path.reverse() return path def unique(self, _list): return [list(x) for x in set(tuple(x) for x in _list)] def unique_ordered(self, _list): frequency = [] for element in _list: if element not in frequency: frequency.append(element) return frequency def run(self): self.queries = [] for table_of_from in self.tables_of_from: links = [] query = Query() query.set_from(From(table_of_from)) join_object = Join() for column in self.columns_of_select: if column not in self.database_dico[table_of_from]: foreign_table = self.get_tables_of_column(column)[0] join_object.add_table(foreign_table) link = self.get_link(table_of_from, foreign_table) if not link: self.queries = ParsingException( "There is at least column `" + column + "` that is unreachable from table `" + table_of_from.upper() + "`!") return else: links.extend(link) for column in self.columns_of_where: if column not in self.database_dico[table_of_from]: foreign_table = self.get_tables_of_column(column)[0] join_object.add_table(foreign_table) link = self.get_link(table_of_from, foreign_table) if not link: self.queries = ParsingException( "There is at least column `" + column + "` that is unreachable from table `" + table_of_from.upper() + "`!") return else: links.extend(link) join_object.set_links(self.unique_ordered(links)) query.set_join(join_object) self.queries.append(query) def join(self): Thread.join(self) return self.queries