def test_two_tables(self):
     result = format({"select": "*", "from": ["XYZZY", "ABC"]})
     expected = "SELECT * FROM XYZZY, ABC"
     self.assertEqual(result, expected)
 def test_no_arguments(self):
     result = format({"select": {"value": {"now": {}}}})
     expected = "SELECT NOW()"
     self.assertEqual(result, expected)
 def test_issue_104(self):
     expected = (
         'SELECT NomPropriete AS Categorie, ROUND(AVG(NotePonderee), 2) AS "Moyenne'
         ' des notes", ROUND(AVG(Complexite), 2) AS "Complexite moyenne" FROM'
         " Propriete, Categorie, Jeu WHERE IdPropriete = IdCategorie AND"
         " Categorie.IdJeu = Jeu.IdJeu AND NotePonderee > 0 GROUP BY IdPropriete,"
         ' NomPropriete ORDER BY "Moyenne des notes" DESC,"Complexite moyenne" DESC'
     )
     result = format({
         "select": [
             {
                 "value": "NomPropriete",
                 "name": "Categorie"
             },
             {
                 "value": {
                     "round": [{
                         "avg": "NotePonderee"
                     }, 2]
                 },
                 "name": "Moyenne des notes",
             },
             {
                 "value": {
                     "round": [{
                         "avg": "Complexite"
                     }, 2]
                 },
                 "name": "Complexite moyenne",
             },
         ],
         "from": ["Propriete", "Categorie", "Jeu"],
         "where": {
             "and": [
                 {
                     "eq": ["IdPropriete", "IdCategorie"]
                 },
                 {
                     "eq": ["Categorie.IdJeu", "Jeu.IdJeu"]
                 },
                 {
                     "gt": ["NotePonderee", 0]
                 },
             ]
         },
         "groupby": [{
             "value": "IdPropriete"
         }, {
             "value": "NomPropriete"
         }],
         "orderby": [
             {
                 "value": "Moyenne des notes",
                 "sort": "desc"
             },
             {
                 "value": "Complexite moyenne",
                 "sort": "desc"
             },
         ],
     })
     self.assertEqual(result, expected)
示例#4
0
 def test_no_arguments(self):
     result = format({'select': {'value': {'now': {}}}})
     expected = "SELECT NOW()"
     self.assertEqual(result, expected)
 def test_limit(self):
     result = format({"select": "*", "from": "a", "limit": 10})
     expected = "SELECT * FROM a LIMIT 10"
     self.assertEqual(result, expected)
示例#6
0
 def test_limit(self):
     result = format({'select': '*', 'from': 'a', 'limit': 10})
     expected = "SELECT * FROM a LIMIT 10"
     self.assertEqual(result, expected)
示例#7
0
    def opine_sql(self, sqlquery, mode='histogram'):
        """
        One test case input (query):
            SELECT h.name
            FROM hotel_amsterdam AS h
            WHERE h.price <= 15
              AND h.opine = 'very clean room';
              
        expected output (pgsql):            
            SELECT
                h.name,
                madlib.logregr_predict_prob(
                generate_features_phrases( -- user-defined aggregate
                           p2v('very clean room'),
                                           -- numeric vectors are passed directly by the parser
                                           -- parser translates the query terms to vectors
                           pvec, senti, count),
                M.coef) AS score
            FROM
                hotel_amsterdam AS h
                hotel_amsterdam_phrase_model AS pm, -- single row logistic regression model
                hotel_amsterdam_histogram AS c
            WHERE h.price <= 15
              AND c.attribute = %s(nearest_attr)
              and h.name = c.hotel_name              
            GROUP BY h.name
            ORDER BY score
        """
        # TODOs
        # DONE 1. for each row in table hotel_amsterdam_histogram, add pvec, senti value to the table --
        # NEED CORRECTION!!! 2. finish self.translate
        # 3. write plpython aggregate for generate_feature
        # DONE 4. create phrase_model table and insert the coef into the table
        # 5. (after the deadline) add same for marker, allow multiple entity tables,
        #    remove the need to load histogram and extractions in memory, etc.

        #parser part
        parsed_sql, qterms, where_clause = self.sqlparser(sqlquery)
        nearest_attr = []
        qvec_list = []
        result_row = []

        import psycopg2
        conn = psycopg2.connect(
            "host=postgres dbname=postgres user=postgres port=5432")

        #no subjective query, only objective
        if len(qterms) == 0:
            pgsql = format(parsed_sql)
            cur = conn.cursor()
            cur.execute(pgsql)
            rows = cur.fetchall()
            #print(rows)
            result_row.append(rows)

        #computer each qterm column separately
        for qterm in qterms:
            qterm_sql = copy.deepcopy(parsed_sql)
            qterm_where_clause = copy.deepcopy(where_clause)

            qterm = qterm.lower()

            qvec = self.phrase2vec(qterm)
            qvec_list.append(qvec)

            attr_name, _ = self.interpret(qterm)
            nearest_attr.append(attr_name)

            print(qterm, attr_name)

            #translate
            pgsql = self.translate(qterm_sql, qterm, attr_name,
                                   qterm_where_clause)
            #print(pgsql)

            #plpython part
            cur = conn.cursor()
            cur.execute(pgsql)
            rows = cur.fetchall()

            result_row.append(rows)

        #aggregate logic
        result = {}
        for idx, row in enumerate(result_row):
            if row is None:
                continue

            if idx == 0:
                for bid in row:
                    if len(bid) > 1:
                        result[bid[0]] = bid[1]
                    else:
                        result[bid[0]] = 1
            else:
                for bid in row:
                    result[bid[0]] *= bid[1]  #fuzzy logic now

        return sorted(result.items(), key=lambda x: -x[1])
示例#8
0
    def filter(self, where=None, limit=None, get_col_map=False):
        try:
            parsed_query = moz_sql_parser.parse(
                self._query.replace('FORMAT JSON', ''))

            modified_columns = []
            for col, op, value in where or []:
                past_where_clause = parsed_query.get('where', {})

                op = op.lower()
                op_json = binary_ops.get(op, None)

                if op_json is None:
                    print(
                        f"Operator: {op} not found in the sql parser operator list\n Using it anyway."
                    )
                    op_json = op

                if op == 'like':
                    value = '%' + value.strip('%') + '%'
                    if 'clickhouse' in self.name().lower():
                        col = f'toString({col})'
                    elif 'postgres' in self.name().lower():
                        col = f'{col}::text'
                    elif 'mariadb' in self.name().lower(
                    ) or 'mysql' in self.name().lower(
                    ) or 'mssql' in self.name().lower():
                        col = f'CAST({col} AS TEXT)'

                modified_columns.append(col)

                where_clause = {op_json: [col, value]}

                if len(past_where_clause) > 0:
                    where_clause = {'and': [where_clause, past_where_clause]}

                parsed_query['where'] = where_clause

            if limit is not None:
                parsed_query['limit'] = limit

            query = moz_sql_parser.format(parsed_query)
            query = query.replace('"', "'")
            query = query.replace("'.'", ".")

            for col in modified_columns:
                if f"'{col}'" in query:
                    query = query.replace(f"'{col}'", col)

            df, col_map = self.query(query)
            df, col_map = unnest(df, col_map)
            if get_col_map:
                return df, col_map
            else:
                return df

        except Exception as e:
            print(traceback.format_exc())
            print('Failed to filter using SQL: ', e)
            return super().filter(where=where,
                                  limit=limit,
                                  get_col_map=get_col_map)
示例#9
0
def getPredIsNULL(sql, note='IS NULL'):
    # sql = sql.strip().replace('`', '').lower()
    # sql = sql.strip().lower()
    sql = sql.strip()

    if 'WHERE' in sql and 'HAVING' in sql:
        return ''

    if 'WHERE' not in sql and 'HAVING' not in sql and 'ON' not in sql:
        return sql

    subsql = sql
    subquery = False

    # parsing error now
    tokens = parser.parse(sql)

    for key, value in tokens.items():
        if key == 'from' and 'value' in value:
            subsql = parser.format(value['value'], ansi_quotes=False) + ';'
            subquery = True
            break

    if subquery:
        newsubsql = getPredIsNULL(subsql, note)
        # fix comparison and optimize
        if newsubsql.strip().lower() != subsql.strip().lower():
            newsubsql = ' (' + getPredIsNULL(subsql, note).replace(
                ';', '').strip() + ') AS t'
            # print('newsub: ', newsubsql)
            import re
            newsql = re.sub('\s*\(SELECT .*\) AS [a-zA-Z_`]*', newsubsql, sql,
                            re.X)
            # print('sql: ', sql)
            # print('new: ', newsql)
            newsql = newsql.replace('= TRUE', 'IS TRUE'.lower()).replace(
                '= FALSE', 'IS FALSE'.lower())
            return newsql
        else:
            subsql = sql

    # fix, drop limit, not effective
    select = subsql.replace(';', '').split('LIMIT')[0]

    if 'HAVING' in select:
        parsed = select.split('HAVING')
        pred = 'HAVING'
    elif 'WHERE' in select:
        parsed = select.split('WHERE')
        pred = 'WHERE'
    elif 'ON' in select:
        parsed = select.split('ON', 1)
        pred = 'ON'
    else:
        return subsql

    head = parsed[0].strip() + ';'

    if 'ORDER BY' in select:
        where = parsed[1].split('ORDER BY')[0][:-1].strip()
    else:
        where = parsed[1].strip()

    # if not where.strip().startswith('(') or not where.strip().endswith(')'):
    #     where = '({})'.format(where)

    concated = getAddWhereToSelect(head, '( {} ) {}'.format(where, note), pred)
    # print(concated)
    concated = concated.replace('= TRUE', 'IS TRUE'.lower()).replace(
        '= FALSE', 'IS FALSE'.lower())
    # print(concated)
    return concated
示例#10
0
    def parse_as_create_ai_table(self) -> dict:
        CREATE, AI, TABLE, VIEW, FROM, USING, AS = map(
            CaselessKeyword, "CREATE AI TABLE VIEW FROM USING AS".split())

        AI_TABLE = AI + TABLE

        word = Word(alphanums + "_")

        expr = (CREATE + (AI_TABLE | VIEW) + word('ai_table_name') + AS +
                originalTextFor(nestedExpr('(', ')'))('select'))

        r = expr.parseString(self._sql)
        r = r.asDict()

        if r['select'].startswith('(') and r['select'].endswith(')'):
            r['select'] = r['select'][1:-1]
        r['select'] = r['select'].strip(' \n')

        select = moz_sql.parse(r['select'])

        if 'from' not in select \
           or len(select['from']) != 2 \
           or 'join' not in select['from'][1]:
            raise Exception(
                "'from' must be like: 'from integration.table join predictor'")

        # add 'name' to each statement
        for s in [
                *select['select'], select['from'][0], select['from'][1]['join']
        ]:
            if 'name' not in s:
                if '.' in s['value']:
                    s['name'] = s['value'][s['value'].find('.') + 1:]
                else:
                    s['name'] = s['value']

        f = {
            'integration': select['from'][0],
            'predictor': select['from'][1]['join']
        }

        # remove predictor join
        select['from'].pop()

        new_select = []
        predictor_fields = []
        integration_prefix = f"{f['integration']['name']}."
        for s in select['select']:
            if s['value'].startswith(integration_prefix):
                s['value'] = s['value'][len(integration_prefix):]
                new_select.append(s)
            else:
                predictor_fields.append(s)

        predictor_prefix = f"{f['predictor']['name']}."
        for pf in predictor_fields:
            if pf['value'].startswith(predictor_prefix):
                pf['value'] = pf['value'][len(predictor_prefix):]

        integration_name = f['integration']['value'][:f['integration']['value']
                                                     .find('.')]
        f['integration']['value'] = f['integration']['value'][
            len(integration_name) + 1:]
        select['select'] = new_select
        integration_sql = moz_sql.format(select)

        res = {
            'ai_table_name': r['ai_table_name'],
            'integration_name': integration_name,
            'integration_query': integration_sql,
            'query_fields': select['select'],
            'predictor_name': f['predictor']['value'],
            'predictor_fields': predictor_fields
        }

        return res