def _parse_select(self, sql): """ parsing the sql by the grammar Select ::= A | AA | AAA | ... | A ::= agg column table :return: [Sel(), states] """ result = [] select = sql['sql']['select'][1] result.append(Sel(0)) result.append(N(len(select) - 1)) for sel in select: result.append(A(sel[0])) self.colSet.add(sql['col_set'].index(sql['names'][sel[1][1][1]])) result.append(C(sql['col_set'].index(sql['names'][sel[1][1][1]]))) # now check for the situation with * if sel[1][1][1] == 0: result.append(self._parser_column0(sql, select)) else: result.append(T(sql['col_table'][sel[1][1][1]])) if not self.copy_selec: self.copy_selec = [copy.deepcopy(result[-2]), copy.deepcopy(result[-1])] return result, None
def derive(nodes_type, selected_C): lst = [] for node_type in nodes_type: if (node_type == Root1): node = Root1(3) elif (node_type == Root): node = Root(5) elif (node_type == N): node = N(0) # 此处存疑,或许也可以是包含selected_A中所有的A elif (node_type == A): node = A(0) elif (node_type == Sel): node = Sel(0) elif (node_type == Filter): id = random.randint(2, 10) node = Filter(id) elif (node_type == Order): id = random.randint(0, 1) node = Order(id) elif (node_type == Sup): id = random.randint(0, 1) node = Sup(id) generate(node, selected_C) lst.append(node) return lst
def generate(node, selected_C): if (isinstance(node, A)): idx = random.randint(0, len(selected_C) - 1) selected_C[idx].set_parent(node) node.add_children(selected_C[idx]) return elif (isinstance(node, Root1)): child = Root(5) child.set_parent(node) node.add_children(child) generate(child, selected_C) elif (isinstance(node, Root)): child = Sel(0) child.set_parent(node) node.add_children(child) generate(child, selected_C) elif (isinstance(node, Sel)): child = N(0) child.set_parent(node) node.add_children(child) generate(child, selected_C) elif (isinstance(node, N) or isinstance(node, Order) or isinstance(node, Sup) or isinstance(node, Filter)): child = A(0) child.set_parent(node) node.add_children(child) generate(child, selected_C)
def generate_sketch(node): if (isinstance(node, N) or isinstance(node, Order) or isinstance(node, Sup) or isinstance(node, Filter)): return elif (isinstance(node, Root1)): child = Root(5) child.set_parent(node) node.add_children(child) generate_sketch(child) elif (isinstance(node, Root)): child = Sel(0) child.set_parent(node) node.add_children(child) generate_sketch(child) elif (isinstance(node, Sel)): child = N(0) child.set_parent(node) node.add_children(child) generate_sketch(child)
def _parse_select(self, sql, parent, history_utterance): """ parsing the sql by the grammar Select ::= A | AA | AAA | ... | A ::= agg column table :return: [Sel(), states] """ result = [] #TODO [[3, [0, [0, 0, False], None]]] [[ 聚合,[0,[0,列id,False,None]]] select = sql['sql']['select'][1] result.append(Sel(0, parent)) sub_parent = action_map[Sel] result.append(N(len(select) - 1, sub_parent)) #TODO select [ [0, [0, [0, 2, False], None]], [0, [0, [0, 6, False], None]] ] for sel in select: #TODO sel : [0, [0, [0, 2, False], None]] sel[1][1][1] == 2 #TODO 保存 第一个是聚合函数id,A(none) result.append(A(sel[0], sub_parent)) #TODO 保存列名 self.colSet.add(sql['col_set'].index(sql['names'][sel[1][1][1]])) col_id = sql['col_set'].index(sql['names'][sel[1][1][1]]) # now check for the situation with * #TODO 保存表名 , 特殊形似,如果是select * , 那么从sql语句进行推理出属于哪个表。(比较复杂) if sel[1][1][1] == 0: table_id = self._parser_column0(sql, select) if table_id == -1: col_id = sql['col_set'].index('*') tc_idx = sql['columns_names_embedder_idxes'].index( (table_id, col_id)) result.append(TC(tc_idx, sub_parent)) #TODO 类似 SELECT COUNT(*) FROM (SELECT COUNT(*) ... )形式 ,from不是表格 if table_id == -1: global from_T full_query = {} query = {} full_query['names'] = sql['names'] full_query['col_table'] = sql['col_table'] full_query['col_set'] = sql['col_set'] full_query['table_names'] = sql['table_names'] full_query['keys'] = sql['keys'] full_query['columns_names_embedder'] = sql[ 'columns_names_embedder'] full_query['columns_names_embedder_idxes'] = sql[ 'columns_names_embedder_idxes'] full_query['nltk_pos'] = sql['nltk_pos'] full_query['utterance_arg_type'] = sql[ 'utterance_arg_type'] full_query['utterance_arg'] = sql['utterance_arg'] # print(full_query.keys()) # print(sql.keys()) query['utterance_toks'] = sql['utterance_toks'] query['sql'] = sql['sql']['from']['table_units'][0][1] # query['query_toks_no_value'] = sql['query_toks_no_value'] query['query'] = sql['query'] query['utterance_arg_type'] = sql['utterance_arg_type'] query['utterance_arg'] = sql['utterance_arg'] from_T += 1 from_result = self.full_parse(full_query, query, history_utterance) result.extend(from_result) else: table_id = sql['col_table'][sel[1][1][1]] tc_idx = sql['columns_names_embedder_idxes'].index( (table_id, col_id)) result.append(TC(tc_idx, sub_parent)) if not self.copy_selec: self.copy_selec = [ copy.deepcopy(result[-2]), copy.deepcopy(result[-1]) ] return result, None
for node in new_node_lst: node.parent = None node.children = [] return new_node_lst # print(new_node_lst) # print(action_p) if __name__ == '__main__': # correct_s = "Root1(3) Root(4) Sel(0) N(2) A(0) C(3) T(1) A(0) C(9) T(1) A(0) C(12) T(1) Order(0) A(0) C(12) T(1)".split() correct = [ Root1(3), Root(3), Sel(0), N(0), Filter(0), Filter(0), Filter(2), Root(3), Sel(0), N(0), Filter(2), Filter(2) ] # predicted_s = 'Root1(3) Root(4) Sel(0) N(2) A(0) C(4)'.split() predicted = [ Root1(3), Root(3), Sel(0),