Пример #1
0
    def test_query_with_where_clauses_defined_by_user_and_generic(self):
        options = {
            'url':
            'bolt://fdw-neo4j',
            'user':
            '******',
            'password':
            '******',
            'database':
            'testdb',
            'cypher':
            'MATCH (p:Person)-[:ACTED_IN]->(m:Movie) /*WHERE{"actor":"p.name"}*/ WITH p.name AS name, m.title AS title RETURN name AS actor, title AS movie'
        }
        columns = ['actor', 'movie']
        nfdw = neo4jfdw.Neo4jForeignDataWrapper(options, columns)

        quals = [
            Qual('movie', '=', 'The Matrix'),
            Qual('actor', '=', 'Keanu Reeves')
        ]
        query = nfdw.make_cypher(quals, columns, None)

        self.assertEqual(
            'MATCH (p:Person)-[:ACTED_IN]->(m:Movie)  WHERE p.name=$`actor` WITH p.name AS name, m.title AS title WITH name AS actor, title AS movie WHERE movie=$`movie` RETURN actor, movie',
            query)
Пример #2
0
    def test_like_operator(self):
        """ tests ~~ operator. """
        options = {
            'filename':
            os.path.join(TEST_FILES_DIR,
                         'rowid_int_col1_str_header_and_100_rows_gzipped.msg')
        }
        columns = ['rowid', 'col1']
        wrapper = PartitionMsgpackForeignDataWrapper(options, columns)

        quals = [Qual('col1', '~~', '1')]
        ret = list(wrapper.execute(quals, columns))
        self.assertEqual(len(ret), 1)
        self.assertEqual(ret[0], [1, '1'])

        quals = [Qual('col1', '~~', '%1')]
        ret = list(wrapper.execute(quals, columns))
        self.assertEqual(len(ret), 10)
        self.assertEqual(ret[0], [1, '1'])
        self.assertEqual(ret[1], [11, '11'])
        self.assertEqual(ret[-1], [91, '91'])

        quals = [Qual('col1', '~~', '1%')]
        ret = list(wrapper.execute(quals, columns))
        self.assertEqual(len(ret), 11)
        self.assertEqual(ret[0], [1, '1'])
        self.assertEqual(ret[1], [10, '10'])
        self.assertEqual(ret[-1], [19, '19'])

        quals = [Qual('col1', '~~', '_')]
        ret = list(wrapper.execute(quals, columns))
        self.assertEqual(len(ret), 10)
        self.assertEqual(ret[0], [0, '0'])
        self.assertEqual(ret[1], [1, '1'])
        self.assertEqual(ret[-1], [9, '9'])
Пример #3
0
    def generate_condition(self, qual, cypher_variable=None):
        """
        Build a neo4j condition from a qual
        """

        condition = ''

        if cypher_variable is not None:
            query_param_name = cypher_variable
        else:
            query_param_name = qual.field_name

        # quals is a list with ANY
        if qual.list_any_or_all == ANY:
            values = [
                '( %s )' % self.generate_condition(Qual(qual.field_name, qual.operator[0], value), '`' + unicode(query_param_name) + '`' + '[' + unicode(array_index) + ']')
                for array_index, value in enumerate(qual.value)
            ]
            condition = ' ( ' +  ' OR '.join(values) + ' ) '

        # quals is a list with ALL
        elif qual.list_any_or_all == ALL:
            conditions = [
                self.generate_condition(Qual(qual.field_name, qual.operator[0], value), '`' + unicode(query_param_name) + '`' + '[' + unicode(array_index) + ']')
                for array_index, value in enumerate(qual.value)
            ]

        # quals is just a string
        else:
            if qual.operator in ('~~', '!~~', '~~*', '!~~*'):
                # Convert to cypher regex
                regex = qual.value.replace('%', '.*')

                # For the negation, we prefix with NOT
                if qual.operator in ('!~~', '!~~*'):
                    condition += ' NOT '

                # Adding the variable name
                condition += qual.field_name + " =~ '"

                # If it's a ILIKE, we prefix regex with (?i)
                if qual.operator in ('~~*', '!~~*'):
                    condition += '(?i)'

                # We add the regex
                condition += regex + "' "

            else:
                if query_param_name.startswith('`'):
                    condition = qual.field_name +  qual.operator + "$" + unicode(query_param_name)
                else:
                    condition = qual.field_name +  qual.operator + "$`" + unicode(query_param_name) + '`'

        log_to_postgres('Condition is : ' + unicode(condition), DEBUG)
        return condition
Пример #4
0
 def setUp(self):
     self._columns = {
         'f__o_o': ColumnDefinition('f__o_o', type_name='text'),
         'bar': ColumnDefinition('bar', type_name='int'),
         'baz': ColumnDefinition('baz', type_name='text[]'),
         'quux': ColumnDefinition('quux', type_name='int')
     }
     self._fdw = ESForeignDataWrapper(
         {
             'doc_type': 'foo_doc',
             'index': 'our_index'
         }, self._columns)
     self._quals = [Qual('f__o_o', '=', 'value'), Qual('bar', '>', 5)]
Пример #5
0
    def compute_columns_stat(self):
        result = list()

        session = self.driver.session()
        try:
            for column_name in self.columns:
                quals = [Qual(column_name, '=', 'WHATEVER')]
                query = 'EXPLAIN ' + self.make_cypher(quals, self.columns,
                                                      None)
                rs = session.run(query, {})
                explain_summary = rs.summary().plan[2]
                stats = explain_summary['EstimatedRows']

                log_to_postgres(
                    'Explain query for column ' + unicode(column_name) +
                    ' is : ' + unicode(query), DEBUG)
                log_to_postgres(
                    'Stat for column ' + unicode(column_name) + ' is : ' +
                    unicode(explain_summary['EstimatedRows']), DEBUG)

                result.append(((column_name, ), int(stats)))

        except CypherError:
            raise RuntimeError("Bad cypher query : " + query)
        finally:
            session.close()

        log_to_postgres('Columns stats are :' + unicode(result), DEBUG)
        return result
Пример #6
0
 def test_geom_predicate_missing(self):
   """
   fdw.WCS.execute missing geom predicate
   """
   options = {'url' : '', 'layer' : '', 'crs' : 'EPSG:4326', 'band' : '2'}
   columns = ['raster', 'geom']
   qual = Qual('xyz', '=', 'xyz')
   fdw = WCS(options, columns)
   self.assertRaises(MissingQueryPredicateError, fdw._get_predicates, [qual])
Пример #7
0
 def test_geom_predicate_B(self):
   """
   fdw.WCS._get_predicates A = geom
   """
   options = {'url' : '', 'layer' : '', 'crs' : 'EPSG:4326', 'band' : '2'}
   columns = ['raster', 'geom']
   qual = Qual('xyz', '=', 'geom')
   fdw = WCS(options, columns)
   value = fdw._get_predicates([qual])
   self.assertEquals(value, 'xyz')
Пример #8
0
 def test_no_connect(self):
   """
   fdw.WCS._get_raster bad connection
   """
   options = {'url' : 'http://non-existant-url.net', 'layer' : '', 'crs' : 'EPSG:4326', 'band' : '2'}
   columns = ['raster', 'geom']
   qual = Qual('010100000000000000000000000000000000000000', '=', 'geom')
   fdw = WCS(options, columns)
   result = fdw.execute([qual], columns)
   self.assertListEqual(result, [])
Пример #9
0
 def test_make_match_lists(self):
     quals = [
         Qual('foo', '=', 'bar'),
         Qual('quux', '<>', 'baz'),
         Qual('num', '<@', '[1, 10]'),
         Qual('a', ('=', True), ['x', 'y', 'z']),
         Qual('b', ('~~', True), ['d', 'e', 'f']),
         Qual('c', ('!~~', True), ['a%b', 'c_d', '_e%f%']),
         Qual('d', ('~~', False), ['g%', 'h_%', 'i%']),
         Qual('e', ('<>', False), [1, 2])
     ]
     ml, mnl = self._fdw._make_match_lists(quals)
     self.assertEqual(len(ml), 7)
     self.assertIn({'terms': {'a': ['x', 'y', 'z']}}, ml)
     self.assertIn(
         {
             'or': [{
                 'regexp': {
                     'b': 'd'
                 }
             }, {
                 'regexp': {
                     'b': 'e'
                 }
             }, {
                 'regexp': {
                     'b': 'f'
                 }
             }]
         }, ml)
     self.assertIn({'prefix': {'d': 'g'}}, ml)
     self.assertIn({'regexp': {'d': 'h..*'}}, ml)
     self.assertIn({'prefix': {'d': 'i'}}, ml)
     self.assertEqual(len(mnl), 3)
     self.assertIn(
         {
             'and': [{
                 'regexp': {
                     'c': 'a.*b'
                 }
             }, {
                 'regexp': {
                     'c': 'c.d'
                 }
             }, {
                 'regexp': {
                     'c': '.e.*f.*'
                 }
             }]
         }, mnl)
     self.assertIn({'terms': {'e': [1, 2]}}, mnl)
Пример #10
0
 def test_equals_operator(self):
     """ tests = operator. """
     options = {
         'filename':
         os.path.join(TEST_FILES_DIR,
                      'rowid_int_col1_str_header_and_100_rows_gzipped.msg')
     }
     columns = ['rowid', 'col1']
     wrapper = PartitionMsgpackForeignDataWrapper(options, columns)
     quals = [Qual('col1', '=', '3')]
     ret = list(wrapper.execute(quals, columns))
     self.assertEqual(len(ret), 1)
     self.assertEqual(ret[0], [3, '3'])
Пример #11
0
 def test_greater_than_or_equal_operator(self):
     """ tests >= operator. """
     options = {
         'filename':
         os.path.join(TEST_FILES_DIR,
                      'rowid_int_col1_int_header_and_100_rows_gzipped.msg')
     }
     columns = ['rowid', 'col1']
     wrapper = PartitionMsgpackForeignDataWrapper(options, columns)
     quals = [Qual('col1', '>=', 98)]
     ret = list(wrapper.execute(quals, columns))
     self.assertEqual(len(ret), 2)
     self.assertEqual(ret[0], [98, 98])
     self.assertEqual(ret[1], [99, 99])
Пример #12
0
 def test_greater_than_operator(self):
     options = {
         'filename':
         os.path.join(TEST_FILES_DIR,
                      'rowid_int_col1_int_header_and_100_rows_gzipped.msg')
     }
     columns = ['rowid', 'col1']
     wrapper = PartitionMsgpackForeignDataWrapper(options, columns)
     quals = [Qual('col1', '>', 10)]
     ret = list(wrapper.execute(quals, columns))
     self.assertEqual(len(ret), 89)
     self.assertEqual(ret[0], [11, 11])
     self.assertEqual(ret[1], [12, 12])
     self.assertEqual(ret[2], [13, 13])
Пример #13
0
    def test_logs_unknown_qual(self, fake_log):
        fake_log\
            .expects_call()\
            .with_args(arg.contains('Unknown operator'), WARNING, hint=arg.contains('Implement'))

        options = {
            'filename':
            os.path.join(TEST_FILES_DIR,
                         'rowid_int_col1_str_header_and_100_rows_gzipped.msg')
        }
        columns = ['rowid', 'col1']
        wrapper = PartitionMsgpackForeignDataWrapper(options, columns)
        quals = [Qual('col1', '?', '3')]
        list(wrapper.execute(quals, columns))
Пример #14
0
 def test_less_than_operator(self):
     """ tests < operator. """
     options = {
         'filename':
         os.path.join(TEST_FILES_DIR,
                      'rowid_int_col1_int_header_and_100_rows_gzipped.msg')
     }
     columns = ['rowid', 'col1']
     wrapper = PartitionMsgpackForeignDataWrapper(options, columns)
     quals = [Qual('col1', '<', 3)]
     ret = list(wrapper.execute(quals, columns))
     self.assertEqual(len(ret), 3)
     self.assertEqual(ret[0], [0, 0])
     self.assertEqual(ret[1], [1, 1])
     self.assertEqual(ret[2], [2, 2])
Пример #15
0
    def test_movie_by_title(self):
        options = {
            'url': 'bolt://fdw-neo4j',
            'user': '******',
            'password': '******',
            'cypher': 'MATCH (n:Movie) RETURN n.title as movie'
        }
        columns = ['movie']
        nfdw = neo4jfdw.Neo4jForeignDataWrapper(options, columns)

        quals = [Qual('movie', '=', 'The Matrix')]
        query = nfdw.make_cypher(quals, columns, None)
        self.assertEqual(
            'MATCH (n:Movie) WITH n.title as movie WHERE movie=$`movie` RETURN movie',
            query)
Пример #16
0
    def test_execute_column_name_translation(self, scan_mock,
                                             _elasticsearch_mock):
        columns = {
            'object__nested_field':
            ColumnDefinition('object__nested_field', type_name='text')
        }
        fdw = ESForeignDataWrapper(
            {
                'doc_type': 'foo_doc',
                'index': 'our_index',
                'column_name_translation': 'true'
            }, columns)
        quals = [Qual('object__nested_field', '=', 'value')]
        scan_mock.return_value = [{
            'fields': {
                'object.nested-field': ['value']
            }
        }]
        rows = list(fdw.execute(quals, ['object__nested_field']))

        expected_query = {
            'fields': ['object.nested-field'],
            'query': {
                'filtered': {
                    'filter': {
                        'bool': {
                            'must': [{
                                'term': {
                                    'object.nested-field': 'value'
                                }
                            }]
                        }
                    }
                }
            }
        }
        scan_mock.assert_called_once_with(fdw.esclient,
                                          query=expected_query,
                                          index='our_index',
                                          doc_type='foo_doc',
                                          size=fdw._SCROLL_SIZE,
                                          scroll=fdw._SCROLL_LENGTH)

        expected_rows = [{'object__nested_field': 'value'}]
        self.assertEqual(rows, expected_rows)
Пример #17
0
 def test_create_cond_multi_col(self):
     quals = [Qual('name', '=', 'alice'), Qual('age', '<', 42)]
     self.assertEqual(self.td_fdw.create_cond(quals),
                      "(name = 'alice') AND (age < 42)")
     quals = [Qual('name', '=', None), Qual('age', ('=', True), [42, 99])]
     self.assertEqual(self.td_fdw.create_cond(quals),
                      "(name IS NULL) AND (age = 42 OR age = 99)")
     quals = [
         Qual('name', ('~~', False), ['%alice%', '%bob%']),
         Qual('age', '<>', None)
     ]
     self.assertEqual(
         self.td_fdw.create_cond(quals),
         "(name LIKE '%alice%' AND name LIKE '%bob%') AND (age IS NOT NULL)"
     )
     quals = [
         Qual('name', ('!~~', False), ['%alice%', '%bob%']),
         Qual('age', '<>', None)
     ]
     self.assertEqual(
         self.td_fdw.create_cond(quals),
         "(name NOT LIKE '%alice%' AND name NOT LIKE '%bob%') AND (age IS NOT NULL)"
     )
Пример #18
0
    def compute_columns_stat(self):
        result = list()

        # Execute & retrieve neo4j data
        if self.driver.supports_multi_db():
            session = self.driver.session(database=self.database)
        else:
            session = self.driver.session()

        try:
            for column_name in self.columns:
                quals = [Qual(column_name, '=', 'WHATEVER')]
                query = 'EXPLAIN ' + self.make_cypher(quals, self.columns,
                                                      None)
                rs = session.run(query, {})
                explain_summary = rs.consume().plan['args']
                stats = explain_summary['EstimatedRows']

                log_to_postgres(
                    'Explain query for column ' + str(column_name) + ' is : ' +
                    str(query), DEBUG)
                log_to_postgres(
                    'Stat for column ' + str(column_name) + ' is : ' +
                    str(explain_summary['EstimatedRows']), DEBUG)

                result.append(((column_name, ), int(stats)))

        except CypherSyntaxError:
            raise RuntimeError("Bad cypher query : " + query)
        except CypherTypeError:
            raise RuntimeError("Bad cypher type in query : " + query)
        finally:
            session.close()

        log_to_postgres('Columns stats are :' + str(result), DEBUG)
        return result
Пример #19
0
 def test_create_cond_string(self):
     quals = [Qual('name', '=', 'alice')]
     self.assertEqual(self.td_fdw.create_cond(quals), "(name = 'alice')")
     quals = [Qual('name', '!=', 'alice')]
     self.assertEqual(self.td_fdw.create_cond(quals), "(name != 'alice')")
     quals = [Qual('name', '<>', 'alice')]
     self.assertEqual(self.td_fdw.create_cond(quals), "(name <> 'alice')")
     quals = [Qual('name', '>', 'alice')]
     self.assertEqual(self.td_fdw.create_cond(quals), "(name > 'alice')")
     quals = [Qual('name', '>=', 'alice')]
     self.assertEqual(self.td_fdw.create_cond(quals), "(name >= 'alice')")
     quals = [Qual('name', '<', 'alice')]
     self.assertEqual(self.td_fdw.create_cond(quals), "(name < 'alice')")
     quals = [Qual('name', '<=', 'alice')]
     self.assertEqual(self.td_fdw.create_cond(quals), "(name <= 'alice')")
     quals = [Qual('name', '~~', '%alice%')]
     self.assertEqual(self.td_fdw.create_cond(quals),
                      "(name LIKE '%alice%')")
     quals = [Qual('name', '!~~', '%alice%')]
     self.assertEqual(self.td_fdw.create_cond(quals),
                      "(name NOT LIKE '%alice%')")
     quals = [Qual('name', ('=', True), ['alice', 'bob'])]
     self.assertEqual(self.td_fdw.create_cond(quals),
                      "(name = 'alice' OR name = 'bob')")
     quals = [Qual('name', ('<', False), ['alice', 'bob'])]
     self.assertEqual(self.td_fdw.create_cond(quals),
                      "(name < 'alice' AND name < 'bob')")
     quals = [Qual('name', '=', None)]
     self.assertEqual(self.td_fdw.create_cond(quals), "(name IS NULL)")
     quals = [Qual('name', '<>', None)]
     self.assertEqual(self.td_fdw.create_cond(quals), "(name IS NOT NULL)")
     quals = [Qual('name', '=', "al'ice")]
     self.assertEqual(self.td_fdw.create_cond(quals), "(name = 'al''ice')")
Пример #20
0
    def make_cypher(self, quals, columns, sortkeys):
        """
        Override cypher query to add search criteria
        """
        query = self.cypher
        log_to_postgres('Init cypher query is : ' + unicode(query), DEBUG)
        log_to_postgres('Quals are : ' + unicode(quals), DEBUG)

        needUpdateProjection = True

        if (quals is not None and len(quals) > 0):

            # Step 1 : we check if there is some `where` annotation in the query
            where_match = re.findall('/\*WHERE([^}]*})\*/', self.cypher)
            if (where_match is not None):
                for group in where_match:
                    log_to_postgres(
                        'Find a custom WHERE clause : ' + unicode(group),
                        DEBUG)
                    group_where_condition = []

                    # parse the JSON and check if field are in the where clause
                    # if so we replace it and remove the clause from the where
                    where_config = json.loads(group)
                    for field in where_config:
                        fieldQuals = filter(
                            lambda qual: qual.field_name == field, quals)
                        if (len(fieldQuals) > 0):
                            for qual in fieldQuals:
                                log_to_postgres(
                                    'Find a field for this custom WHERE clause : '
                                    + unicode(qual), DEBUG)
                                # Generate the condition
                                customQual = Qual(
                                    where_config[qual.field_name],
                                    qual.operator, qual.value)
                                group_where_condition.append(
                                    self.generate_condition(
                                        customQual, qual.field_name))
                                # Remove the qual from the initial qual list
                                quals = filter(
                                    lambda qual: not qual.field_name == field,
                                    quals)

                    # replace the captured group by the condition
                    if (len(group_where_condition) > 0):
                        query = query.replace(
                            '/*WHERE' + group + '*/',
                            ' WHERE ' + ' AND '.join(group_where_condition))
                    else:
                        query = query.replace('/*WHERE' + group + '*/', '')
                    log_to_postgres(
                        'Current cypher query is : ' + unicode(query), DEBUG)

            # Step 2 : if there is still some where clause, we replace the return by a with/where/return
            if (len(quals) > 0):
                log_to_postgres('Generic where clause', DEBUG)
                where_clauses = self.generate_where_conditions(quals)
                pattern = re.compile('(.*)RETURN(.*)',
                                     re.IGNORECASE | re.MULTILINE | re.DOTALL)
                match = pattern.match(query)
                query = match.group(1) + "WITH" + match.group(
                    2) + " WHERE " + ' AND '.join(
                        where_clauses) + " RETURN " + ', '.join(columns)
                log_to_postgres(
                    'Current cypher query after generic where is : ' +
                    unicode(query), DEBUG)
                needUpdateProjection = False

        # Step 3 : We construct the projection for the return
        # we only modify the projection if it's needed (ie. we don't return all the tables columns)
        if (needUpdateProjection and len(columns) < len(self.columns)):
            return_pattern = re.compile(
                '(.*)RETURN(.*)', re.IGNORECASE | re.MULTILINE | re.DOTALL)
            return_match = return_pattern.match(query)
            query = return_match.group(1) + "WITH" + return_match.group(
                2) + " RETURN " + ', '.join(columns)

        # Step 4 : We add the order clause at the end of the query
        if sortkeys is not None:
            orders = []
            for sortkey in sortkeys:
                if sortkey.is_reversed:
                    orders.append(sortkey.attname + ' DESC')
                else:
                    orders.append(sortkey.attname)
            query = query + ' ORDER BY ' + ', '.join(orders)
            log_to_postgres(
                'Current cypher query after sort is : ' + unicode(query),
                DEBUG)

        return query
Пример #21
0
 def test_create_cond_int(self):
     quals = [Qual('age', '=', 42)]
     self.assertEqual(self.td_fdw.create_cond(quals), "(age = 42)")
     quals = [Qual('age', '!=', 42)]
     self.assertEqual(self.td_fdw.create_cond(quals), "(age != 42)")
     quals = [Qual('age', '<>', 42)]
     self.assertEqual(self.td_fdw.create_cond(quals), "(age <> 42)")
     quals = [Qual('age', '>', 42)]
     self.assertEqual(self.td_fdw.create_cond(quals), "(age > 42)")
     quals = [Qual('age', '>=', 42)]
     self.assertEqual(self.td_fdw.create_cond(quals), "(age >= 42)")
     quals = [Qual('age', '<', 42)]
     self.assertEqual(self.td_fdw.create_cond(quals), "(age < 42)")
     quals = [Qual('age', '<=', 42)]
     self.assertEqual(self.td_fdw.create_cond(quals), "(age <= 42)")
     quals = [Qual('age', ('=', True), [42, 99])]
     self.assertEqual(self.td_fdw.create_cond(quals),
                      "(age = 42 OR age = 99)")
     quals = [Qual('age', ('<', False), [42, 99])]
     self.assertEqual(self.td_fdw.create_cond(quals),
                      "(age < 42 AND age < 99)")
     quals = [Qual('age', '=', None)]
     self.assertEqual(self.td_fdw.create_cond(quals), "(age IS NULL)")
     quals = [Qual('age', '<>', None)]
     self.assertEqual(self.td_fdw.create_cond(quals), "(age IS NOT NULL)")