Пример #1
0
    def _parse_select(self, sql):
        """
        parsing the sql by the grammar
        Select ::= A | AA | AAA | ... |
        A ::= agg column table
        :return: [Sel(), states]
        """
        result = []
        select = sql['sql']['select'][1]
        result.append(Sel(0))
        result.append(N(len(select) - 1))

        for sel in select:
            result.append(A(sel[0]))
            self.colSet.add(sql['col_set'].index(sql['names'][sel[1][1][1]]))
            result.append(C(sql['col_set'].index(sql['names'][sel[1][1][1]])))
            # now check for the situation with *
            if sel[1][1][1] == 0:
                result.append(self._parser_column0(sql, select))
            else:
                result.append(T(sql['col_table'][sel[1][1][1]]))
            if not self.copy_selec:
                self.copy_selec = [copy.deepcopy(result[-2]), copy.deepcopy(result[-1])]

        return result, None
Пример #2
0
def derive(nodes_type, selected_C):
    lst = []
    for node_type in nodes_type:
        if (node_type == Root1):
            node = Root1(3)
        elif (node_type == Root):
            node = Root(5)
        elif (node_type == N):
            node = N(0)  # 此处存疑,或许也可以是包含selected_A中所有的A
        elif (node_type == A):
            node = A(0)
        elif (node_type == Sel):
            node = Sel(0)
        elif (node_type == Filter):
            id = random.randint(2, 10)
            node = Filter(id)
        elif (node_type == Order):
            id = random.randint(0, 1)
            node = Order(id)
        elif (node_type == Sup):
            id = random.randint(0, 1)
            node = Sup(id)

        generate(node, selected_C)
        lst.append(node)
    return lst
Пример #3
0
def generate(node, selected_C):
    if (isinstance(node, A)):
        idx = random.randint(0, len(selected_C) - 1)
        selected_C[idx].set_parent(node)
        node.add_children(selected_C[idx])
        return
    elif (isinstance(node, Root1)):
        child = Root(5)
        child.set_parent(node)
        node.add_children(child)
        generate(child, selected_C)
    elif (isinstance(node, Root)):
        child = Sel(0)
        child.set_parent(node)
        node.add_children(child)
        generate(child, selected_C)
    elif (isinstance(node, Sel)):
        child = N(0)
        child.set_parent(node)
        node.add_children(child)
        generate(child, selected_C)
    elif (isinstance(node, N) or isinstance(node, Order)
          or isinstance(node, Sup) or isinstance(node, Filter)):
        child = A(0)
        child.set_parent(node)
        node.add_children(child)
        generate(child, selected_C)
Пример #4
0
def generate_sketch(node):
    if (isinstance(node, N) or isinstance(node, Order)
            or isinstance(node, Sup) or isinstance(node, Filter)):
        return
    elif (isinstance(node, Root1)):
        child = Root(5)
        child.set_parent(node)
        node.add_children(child)
        generate_sketch(child)
    elif (isinstance(node, Root)):
        child = Sel(0)
        child.set_parent(node)
        node.add_children(child)
        generate_sketch(child)
    elif (isinstance(node, Sel)):
        child = N(0)
        child.set_parent(node)
        node.add_children(child)
        generate_sketch(child)
Пример #5
0
    def _parse_select(self, sql, parent, history_utterance):
        """
        parsing the sql by the grammar
        Select ::= A | AA | AAA | ... |
        A ::= agg column table
        :return: [Sel(), states]
        """
        result = []

        #TODO [[3, [0, [0, 0, False], None]]]   [[ 聚合,[0,[0,列id,False,None]]]
        select = sql['sql']['select'][1]

        result.append(Sel(0, parent))
        sub_parent = action_map[Sel]
        result.append(N(len(select) - 1, sub_parent))

        #TODO select [   [0, [0, [0, 2, False], None]],  [0, [0, [0, 6, False], None]]    ]

        for sel in select:
            #TODO sel : [0, [0, [0, 2, False], None]]   sel[1][1][1] == 2
            #TODO 保存 第一个是聚合函数id,A(none)
            result.append(A(sel[0], sub_parent))
            #TODO 保存列名

            self.colSet.add(sql['col_set'].index(sql['names'][sel[1][1][1]]))

            col_id = sql['col_set'].index(sql['names'][sel[1][1][1]])

            # now check for the situation with *
            #TODO 保存表名 , 特殊形似,如果是select * , 那么从sql语句进行推理出属于哪个表。(比较复杂)
            if sel[1][1][1] == 0:
                table_id = self._parser_column0(sql, select)
                if table_id == -1:
                    col_id = sql['col_set'].index('*')
                tc_idx = sql['columns_names_embedder_idxes'].index(
                    (table_id, col_id))
                result.append(TC(tc_idx, sub_parent))

                #TODO 类似 SELECT COUNT(*) FROM (SELECT COUNT(*) ... )形式 ,from不是表格
                if table_id == -1:
                    global from_T
                    full_query = {}
                    query = {}

                    full_query['names'] = sql['names']
                    full_query['col_table'] = sql['col_table']
                    full_query['col_set'] = sql['col_set']
                    full_query['table_names'] = sql['table_names']
                    full_query['keys'] = sql['keys']
                    full_query['columns_names_embedder'] = sql[
                        'columns_names_embedder']
                    full_query['columns_names_embedder_idxes'] = sql[
                        'columns_names_embedder_idxes']
                    full_query['nltk_pos'] = sql['nltk_pos']
                    full_query['utterance_arg_type'] = sql[
                        'utterance_arg_type']
                    full_query['utterance_arg'] = sql['utterance_arg']
                    # print(full_query.keys())
                    # print(sql.keys())
                    query['utterance_toks'] = sql['utterance_toks']
                    query['sql'] = sql['sql']['from']['table_units'][0][1]
                    # query['query_toks_no_value'] = sql['query_toks_no_value']
                    query['query'] = sql['query']
                    query['utterance_arg_type'] = sql['utterance_arg_type']
                    query['utterance_arg'] = sql['utterance_arg']

                    from_T += 1
                    from_result = self.full_parse(full_query, query,
                                                  history_utterance)
                    result.extend(from_result)
            else:
                table_id = sql['col_table'][sel[1][1][1]]
                tc_idx = sql['columns_names_embedder_idxes'].index(
                    (table_id, col_id))
                result.append(TC(tc_idx, sub_parent))

            if not self.copy_selec:
                self.copy_selec = [
                    copy.deepcopy(result[-2]),
                    copy.deepcopy(result[-1])
                ]

        return result, None
Пример #6
0
        for node in new_node_lst:
            node.parent = None
            node.children = []

        return new_node_lst

    # print(new_node_lst)
    # print(action_p)


if __name__ == '__main__':
    # correct_s = "Root1(3) Root(4) Sel(0) N(2) A(0) C(3) T(1) A(0) C(9) T(1) A(0) C(12) T(1) Order(0) A(0) C(12) T(1)".split()
    correct = [
        Root1(3),
        Root(3),
        Sel(0),
        N(0),
        Filter(0),
        Filter(0),
        Filter(2),
        Root(3),
        Sel(0),
        N(0),
        Filter(2),
        Filter(2)
    ]
    # predicted_s = 'Root1(3) Root(4) Sel(0) N(2) A(0) C(4)'.split()
    predicted = [
        Root1(3),
        Root(3),
        Sel(0),