Пример #1
0
    def test_marker(self) -> None:
        with patch.object(GraphDatabase, 'driver'):
            task = Neo4jStalenessRemovalTask()
            job_config = ConfigFactory.from_dict({
                f'job.identifier': 'remove_stale_data_job',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_END_POINT_KEY}': 'foobar',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_USER}': 'foo',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_PASSWORD}': 'bar',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.STALENESS_MAX_PCT}': 5,
                neo4j_csv_publisher.JOB_PUBLISH_TAG: 'foo'
            })

            task.init(job_config)
            self.assertIsNone(task.ms_to_expire)
            self.assertEqual(task.marker, 'foo')

            task = Neo4jStalenessRemovalTask()
            job_config = ConfigFactory.from_dict({
                f'job.identifier': 'remove_stale_data_job',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_END_POINT_KEY}': 'foobar',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_USER}': 'foo',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_PASSWORD}': 'bar',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.STALENESS_MAX_PCT}': 5,
                f'{task.get_scope()}.{neo4j_staleness_removal_task.MS_TO_EXPIRE}': 86400000,
            })

            task.init(job_config)
            self.assertIsNotNone(task.ms_to_expire)
            self.assertEqual(task.marker, 86400000)
Пример #2
0
    def test_ms_to_expire_too_small(self) -> None:
        with patch.object(GraphDatabase, 'driver'):
            task = Neo4jStalenessRemovalTask()
            job_config = ConfigFactory.from_dict({
                f'job.identifier': 'remove_stale_data_job',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_END_POINT_KEY}': 'foobar',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_USER}': 'foo',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_PASSWORD}': 'bar',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.STALENESS_MAX_PCT}': 5,
                f'{task.get_scope()}.{neo4j_staleness_removal_task.TARGET_NODES}': ['Foo'],
                f'{task.get_scope()}.{neo4j_staleness_removal_task.TARGET_RELATIONS}': ['BAR'],
                f'{task.get_scope()}.{neo4j_staleness_removal_task.MS_TO_EXPIRE}': 24 * 60 * 60 * 100 - 10
            })

            try:
                task.init(job_config)
                self.assertTrue(False, 'Should have failed with small TTL   ')
            except Exception:
                pass

        with patch.object(GraphDatabase, 'driver'):
            task = Neo4jStalenessRemovalTask()
            job_config = ConfigFactory.from_dict({
                f'job.identifier': 'remove_stale_data_job',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_END_POINT_KEY}': 'foobar',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_USER}': 'foo',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_PASSWORD}': 'bar',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.STALENESS_MAX_PCT}': 5,
                f'{task.get_scope()}.{neo4j_staleness_removal_task.TARGET_NODES}': ['Foo'],
                f'{task.get_scope()}.{neo4j_staleness_removal_task.TARGET_RELATIONS}': ['BAR'],
                f'{task.get_scope()}.{neo4j_staleness_removal_task.MS_TO_EXPIRE}': 24 * 60 * 60 * 1000,
            })
            task.init(job_config)
Пример #3
0
    def test_validation_receives_correct_counts(self) -> None:
        with patch.object(GraphDatabase, 'driver'), patch.object(Neo4jStalenessRemovalTask, '_execute_cypher_query') \
                as mock_execute:
            task = Neo4jStalenessRemovalTask()
            job_config = ConfigFactory.from_dict({
                f'job.identifier': 'remove_stale_data_job',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_END_POINT_KEY}': 'foobar',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_USER}': 'foo',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_PASSWORD}': 'bar',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.STALENESS_MAX_PCT}': 5,
                f'{task.get_scope()}.{neo4j_staleness_removal_task.TARGET_NODES}': ['Foo'],
                f'{task.get_scope()}.{neo4j_staleness_removal_task.TARGET_RELATIONS}': ['BAR'],
                neo4j_csv_publisher.JOB_PUBLISH_TAG: 'foo',
            })

            task.init(job_config)

            with patch.object(Neo4jStalenessRemovalTask, '_validate_staleness_pct') as mock_validate:
                mock_execute.side_effect = [[{'count': 100}], [{'count': 50}]]
                task._validate_node_staleness_pct()
                mock_validate.assert_called_with(total_record_count=100,
                                                 stale_record_count=50,
                                                 target_type='Foo')

                mock_execute.side_effect = [[{'count': 100}], [{'count': 50}]]
                task._validate_relation_staleness_pct()
                mock_validate.assert_called_with(total_record_count=100,
                                                 stale_record_count=50,
                                                 target_type='BAR')
    def test_validation_failure(self) -> None:

        with patch.object(GraphDatabase, 'driver'):
            task = Neo4jStalenessRemovalTask()
            job_config = ConfigFactory.from_dict({
                'job.identifier':
                'remove_stale_data_job',
                '{}.{}'.format(
                    task.get_scope(), neo4j_staleness_removal_task.NEO4J_END_POINT_KEY):
                'foobar',
                '{}.{}'.format(task.get_scope(), neo4j_staleness_removal_task.NEO4J_USER):
                'foo',
                '{}.{}'.format(task.get_scope(), neo4j_staleness_removal_task.NEO4J_PASSWORD):
                'bar',
                '{}.{}'.format(task.get_scope(), neo4j_staleness_removal_task.STALENESS_MAX_PCT):
                90,
                neo4j_csv_publisher.JOB_PUBLISH_TAG:
                'foo'
            })

            task.init(job_config)
            total_records = [{'type': 'foo', 'count': 100}]
            stale_records = [{'type': 'foo', 'count': 50}]
            targets = {'foo'}
            task._validate_staleness_pct(total_records=total_records,
                                         stale_records=stale_records,
                                         types=targets)
Пример #5
0
    def test_validation_statement_ms_to_expire_retain_data_with_no_publisher_metadata(self) -> None:
        with patch.object(GraphDatabase, 'driver'), patch.object(Neo4jStalenessRemovalTask, '_execute_cypher_query') \
                as mock_execute:
            task = Neo4jStalenessRemovalTask()
            job_config = ConfigFactory.from_dict({
                f'job.identifier': 'remove_stale_data_job',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_END_POINT_KEY}': 'foobar',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_USER}': 'foo',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_PASSWORD}': 'bar',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.STALENESS_MAX_PCT}': 5,
                f'{task.get_scope()}.{neo4j_staleness_removal_task.TARGET_NODES}': ['Foo'],
                f'{task.get_scope()}.{neo4j_staleness_removal_task.TARGET_RELATIONS}': ['BAR'],
                f'{task.get_scope()}.{neo4j_staleness_removal_task.MS_TO_EXPIRE}': 9876543210,
                f'{task.get_scope()}.{neo4j_staleness_removal_task.RETAIN_DATA_WITH_NO_PUBLISHER_METADATA}': True
            })

            task.init(job_config)
            task._validate_node_staleness_pct()
            mock_execute.assert_any_call(param_dict={'marker': 9876543210},
                                         statement=textwrap.dedent("""
            MATCH (target:Foo)
            WHERE (target.publisher_last_updated_epoch_ms < (timestamp() - $marker))
            RETURN count(*) as count
            """))

            task._validate_relation_staleness_pct()
            mock_execute.assert_any_call(param_dict={'marker': 9876543210},
                                         statement=textwrap.dedent("""
            MATCH (start_node)-[target:BAR]-(end_node)
            WHERE (target.publisher_last_updated_epoch_ms < (timestamp() - $marker))
            RETURN count(*) as count
            """))
    def test_delete_dry_run(self) -> None:
        with patch.object(GraphDatabase, 'driver') as mock_driver:
            session_mock = mock_driver.return_value.session

            task = Neo4jStalenessRemovalTask()
            job_config = ConfigFactory.from_dict({
                f'job.identifier':
                'remove_stale_data_job',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_END_POINT_KEY}':
                'foobar',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_USER}':
                'foo',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_PASSWORD}':
                'bar',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.STALENESS_MAX_PCT}':
                5,
                f'{task.get_scope()}.{neo4j_staleness_removal_task.TARGET_NODES}':
                ['Foo'],
                f'{task.get_scope()}.{neo4j_staleness_removal_task.TARGET_RELATIONS}':
                ['BAR'],
                f'{task.get_scope()}.{neo4j_staleness_removal_task.DRY_RUN}':
                True,
                neo4j_csv_publisher.JOB_PUBLISH_TAG:
                'foo',
            })

            task.init(job_config)
            task._delete_stale_nodes()
            task._delete_stale_relations()

            session_mock.assert_not_called()
Пример #7
0
    def test_delete_statement_ms_to_expire(self):
        with patch.object(GraphDatabase, 'driver'), patch.object(Neo4jStalenessRemovalTask, '_execute_cypher_query') \
                as mock_execute:
            mock_execute.return_value.single.return_value = {'count': 0}
            task = Neo4jStalenessRemovalTask()
            job_config = ConfigFactory.from_dict({
                'job.identifier':
                'remove_stale_data_job',
                '{}.{}'.format(
                    task.get_scope(), neo4j_staleness_removal_task.NEO4J_END_POINT_KEY):
                'foobar',
                '{}.{}'.format(task.get_scope(), neo4j_staleness_removal_task.NEO4J_USER):
                'foo',
                '{}.{}'.format(task.get_scope(), neo4j_staleness_removal_task.NEO4J_PASSWORD):
                'bar',
                '{}.{}'.format(task.get_scope(), neo4j_staleness_removal_task.STALENESS_MAX_PCT):
                5,
                '{}.{}'.format(task.get_scope(), neo4j_staleness_removal_task.TARGET_NODES):
                ['Foo'],
                '{}.{}'.format(task.get_scope(), neo4j_staleness_removal_task.TARGET_RELATIONS):
                ['BAR'],
                '{}.{}'.format(task.get_scope(), neo4j_staleness_removal_task.MS_TO_EXPIRE):
                9876543210
            })

            task.init(job_config)
            task._delete_stale_nodes()
            task._delete_stale_relations()

            mock_execute.assert_any_call(dry_run=False,
                                         param_dict={
                                             'marker':
                                             '(timestamp() - 9876543210)',
                                             'batch_size': 100
                                         },
                                         statement=textwrap.dedent("""
            MATCH (n:Foo)
            WHERE 
            n.publisher_last_updated_epoch_ms < $marker
            OR NOT EXISTS(n.publisher_last_updated_epoch_ms)
            WITH n LIMIT $batch_size
            DETACH DELETE (n)
            RETURN COUNT(*) as count;
            """))

            mock_execute.assert_any_call(dry_run=False,
                                         param_dict={
                                             'marker':
                                             '(timestamp() - 9876543210)',
                                             'batch_size': 100
                                         },
                                         statement=textwrap.dedent("""
            MATCH ()-[n:BAR]-()
            WHERE 
            n.publisher_last_updated_epoch_ms < $marker
            OR NOT EXISTS(n.publisher_last_updated_epoch_ms)
            WITH n LIMIT $batch_size
            DELETE n
            RETURN count(*) as count;
                        """))
    def test_validation(self) -> None:

        with patch.object(GraphDatabase, 'driver'):
            task = Neo4jStalenessRemovalTask()
            job_config = ConfigFactory.from_dict({
                f'job.identifier':
                'remove_stale_data_job',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_END_POINT_KEY}':
                'foobar',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_USER}':
                'foo',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_PASSWORD}':
                'bar',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.STALENESS_MAX_PCT}':
                5,
                neo4j_csv_publisher.JOB_PUBLISH_TAG:
                'foo'
            })

            task.init(job_config)
            total_records = [{'type': 'foo', 'count': 100}]
            stale_records = [{'type': 'foo', 'count': 50}]
            targets = {'foo'}
            self.assertRaises(Exception, task._validate_staleness_pct,
                              total_records, stale_records, targets)
    def test_delete_statement_publish_tag(self) -> None:
        with patch.object(GraphDatabase, 'driver'), patch.object(Neo4jStalenessRemovalTask, '_execute_cypher_query') \
                as mock_execute:
            mock_execute.return_value.single.return_value = {'count': 0}
            task = Neo4jStalenessRemovalTask()
            job_config = ConfigFactory.from_dict({
                f'job.identifier':
                'remove_stale_data_job',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_END_POINT_KEY}':
                'foobar',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_USER}':
                'foo',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_PASSWORD}':
                'bar',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.STALENESS_MAX_PCT}':
                5,
                f'{task.get_scope()}.{neo4j_staleness_removal_task.TARGET_NODES}':
                ['Foo'],
                f'{task.get_scope()}.{neo4j_staleness_removal_task.TARGET_RELATIONS}':
                ['BAR'],
                neo4j_csv_publisher.JOB_PUBLISH_TAG:
                'foo',
            })

            task.init(job_config)
            task._delete_stale_nodes()
            task._delete_stale_relations()

            mock_execute.assert_any_call(dry_run=False,
                                         param_dict={
                                             'marker': u'foo',
                                             'batch_size': 100
                                         },
                                         statement=textwrap.dedent("""
            MATCH (n:Foo)
            WHERE{}
            n.published_tag <> $marker
            OR NOT EXISTS(n.published_tag)
            WITH n LIMIT $batch_size
            DETACH DELETE (n)
            RETURN COUNT(*) as count;
            """.format(' ')))

            mock_execute.assert_any_call(dry_run=False,
                                         param_dict={
                                             'marker': u'foo',
                                             'batch_size': 100
                                         },
                                         statement=textwrap.dedent("""
            MATCH ()-[n:BAR]-()
            WHERE{}
            n.published_tag <> $marker
            OR NOT EXISTS(n.published_tag)
            WITH n LIMIT $batch_size
            DELETE n
            RETURN count(*) as count;
                        """.format(' ')))
    def test_validation_statement_publish_tag(self) -> None:
        with patch.object(GraphDatabase, 'driver'), patch.object(Neo4jStalenessRemovalTask, '_execute_cypher_query') \
                as mock_execute:
            task = Neo4jStalenessRemovalTask()
            job_config = ConfigFactory.from_dict({
                'job.identifier':
                'remove_stale_data_job',
                '{}.{}'.format(
                    task.get_scope(), neo4j_staleness_removal_task.NEO4J_END_POINT_KEY):
                'foobar',
                '{}.{}'.format(task.get_scope(), neo4j_staleness_removal_task.NEO4J_USER):
                'foo',
                '{}.{}'.format(task.get_scope(), neo4j_staleness_removal_task.NEO4J_PASSWORD):
                'bar',
                '{}.{}'.format(task.get_scope(), neo4j_staleness_removal_task.STALENESS_MAX_PCT):
                5,
                '{}.{}'.format(task.get_scope(), neo4j_staleness_removal_task.TARGET_NODES):
                ['Foo'],
                neo4j_csv_publisher.JOB_PUBLISH_TAG:
                'foo',
            })

            task.init(job_config)
            task._validate_node_staleness_pct()

            mock_execute.assert_called()
            mock_execute.assert_any_call(statement=textwrap.dedent("""
            MATCH (n)
            WITH DISTINCT labels(n) as node, count(*) as count
            RETURN head(node) as type, count
            """))

            mock_execute.assert_any_call(param_dict={'marker': u'foo'},
                                         statement=textwrap.dedent("""
            MATCH (n)
            WHERE{}
            n.published_tag <> $marker
            OR NOT EXISTS(n.published_tag)
            WITH DISTINCT labels(n) as node, count(*) as count
            RETURN head(node) as type, count
            """.format(' ')))

            task._validate_relation_staleness_pct()
            mock_execute.assert_any_call(param_dict={'marker': u'foo'},
                                         statement=textwrap.dedent("""
            MATCH ()-[n]-()
            WHERE{}
            n.published_tag <> $marker
            OR NOT EXISTS(n.published_tag)
            RETURN type(n) as type, count(*) as count
            """.format(' ')))
Пример #11
0
    def test_validation_statement_ms_to_expire(self):
        with patch.object(GraphDatabase, 'driver'), patch.object(Neo4jStalenessRemovalTask, '_execute_cypher_query') \
                as mock_execute:
            task = Neo4jStalenessRemovalTask()
            job_config = ConfigFactory.from_dict({
                'job.identifier':
                'remove_stale_data_job',
                '{}.{}'.format(
                    task.get_scope(), neo4j_staleness_removal_task.NEO4J_END_POINT_KEY):
                'foobar',
                '{}.{}'.format(task.get_scope(), neo4j_staleness_removal_task.NEO4J_USER):
                'foo',
                '{}.{}'.format(task.get_scope(), neo4j_staleness_removal_task.NEO4J_PASSWORD):
                'bar',
                '{}.{}'.format(task.get_scope(), neo4j_staleness_removal_task.STALENESS_MAX_PCT):
                5,
                '{}.{}'.format(task.get_scope(), neo4j_staleness_removal_task.MS_TO_EXPIRE):
                9876543210
            })

            task.init(job_config)
            task._validate_node_staleness_pct()

            mock_execute.assert_called()
            mock_execute.assert_any_call(statement=textwrap.dedent("""
            MATCH (n)
            WITH DISTINCT labels(n) as node, count(*) as count
            RETURN head(node) as type, count
            """))

            mock_execute.assert_any_call(
                param_dict={'marker': '(timestamp() - 9876543210)'},
                statement=textwrap.dedent("""
            MATCH (n)
            WHERE 
            n.publisher_last_updated_epoch_ms < $marker
            OR NOT EXISTS(n.publisher_last_updated_epoch_ms)
            WITH DISTINCT labels(n) as node, count(*) as count
            RETURN head(node) as type, count
            """))

            task._validate_relation_staleness_pct()
            mock_execute.assert_any_call(
                param_dict={'marker': '(timestamp() - 9876543210)'},
                statement=textwrap.dedent("""
            MATCH ()-[n]-()
            WHERE 
            n.publisher_last_updated_epoch_ms < $marker
            OR NOT EXISTS(n.publisher_last_updated_epoch_ms)
            RETURN type(n) as type, count(*) as count
            """))
Пример #12
0
def remove_stale_data():
    task = Neo4jStalenessRemovalTask()

    job_config_dict = {
        'job.identifier': 'remove_stale_data_job',
        'task.remove_stale_data.neo4j_endpoint': neo4j_endpoint,
        'task.remove_stale_data.neo4j_user': neo4j_user,
        'task.remove_stale_data.neo4j_password': neo4j_password,
        'task.remove_stale_data.staleness_max_pct': 110,
        'task.remove_stale_data.target_nodes': ['Table', 'Column'],
        'task.remove_stale_data.job_publish_tag': 'idk'
    }

    job_config = ConfigFactory.from_dict(job_config_dict)
    job = DefaultJob(conf=job_config, task=task)
    
    return job
Пример #13
0
    def test_validation_threshold_override(self):
        # type: () -> None

        with patch.object(GraphDatabase, 'driver'):
            task = Neo4jStalenessRemovalTask()
            job_config = ConfigFactory.from_dict({
                'job.identifier':
                'remove_stale_data_job',
                neo4j_staleness_removal_task.NEO4J_END_POINT_KEY:
                'foobar',
                neo4j_staleness_removal_task.NEO4J_USER:
                '******',
                neo4j_staleness_removal_task.NEO4J_PASSWORD:
                '******',
                neo4j_staleness_removal_task.STALENESS_MAX_PCT:
                5,
                neo4j_staleness_removal_task.STALENESS_PCT_MAX_DICT: {
                    'foo': 51
                },
                neo4j_csv_publisher.JOB_PUBLISH_TAG:
                'foo'
            })

            task.init(job_config)
            total_records = [{
                'type': 'foo',
                'count': 100
            }, {
                'type': 'bar',
                'count': 100
            }]
            stale_records = [{
                'type': 'foo',
                'count': 50
            }, {
                'type': 'bar',
                'count': 3
            }]
            targets = {'foo', 'bar'}
            task._validate_staleness_pct(total_records=total_records,
                                         stale_records=stale_records,
                                         types=targets)
Пример #14
0
    def test_validation_failure(self) -> None:

        with patch.object(GraphDatabase, 'driver'):
            task = Neo4jStalenessRemovalTask()
            job_config = ConfigFactory.from_dict({
                f'job.identifier': 'remove_stale_data_job',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_END_POINT_KEY}': 'foobar',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_USER}': 'foo',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_PASSWORD}': 'bar',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.STALENESS_MAX_PCT}': 90,
                neo4j_csv_publisher.JOB_PUBLISH_TAG: 'foo'
            })

            task.init(job_config)
            total_record_count = 100
            stale_record_count = 50
            target_type = 'foo'
            task._validate_staleness_pct(total_record_count=total_record_count,
                                         stale_record_count=stale_record_count,
                                         target_type=target_type)
Пример #15
0
    def test_delete_statement_ms_to_expire(self) -> None:
        with patch.object(GraphDatabase, 'driver'), patch.object(Neo4jStalenessRemovalTask, '_execute_cypher_query') \
                as mock_execute:
            mock_execute.return_value.single.return_value = {'count': 0}
            task = Neo4jStalenessRemovalTask()
            job_config = ConfigFactory.from_dict({
                f'job.identifier': 'remove_stale_data_job',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_END_POINT_KEY}': 'foobar',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_USER}': 'foo',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_PASSWORD}': 'bar',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.STALENESS_MAX_PCT}': 5,
                f'{task.get_scope()}.{neo4j_staleness_removal_task.TARGET_NODES}': ['Foo'],
                f'{task.get_scope()}.{neo4j_staleness_removal_task.TARGET_RELATIONS}': ['BAR'],
                f'{task.get_scope()}.{neo4j_staleness_removal_task.MS_TO_EXPIRE}': 9876543210
            })

            task.init(job_config)
            task._delete_stale_nodes()
            task._delete_stale_relations()

            mock_execute.assert_any_call(dry_run=False,
                                         param_dict={'marker': 9876543210, 'batch_size': 100},
                                         statement=textwrap.dedent("""
            MATCH (target:Foo)
            WHERE (target.publisher_last_updated_epoch_ms < (timestamp() - $marker)
            OR NOT EXISTS(target.publisher_last_updated_epoch_ms))
            WITH target LIMIT $batch_size
            DETACH DELETE (target)
            RETURN count(*) as count
            """))

            mock_execute.assert_any_call(dry_run=False,
                                         param_dict={'marker': 9876543210, 'batch_size': 100},
                                         statement=textwrap.dedent("""
            MATCH (start_node)-[target:BAR]-(end_node)
            WHERE (target.publisher_last_updated_epoch_ms < (timestamp() - $marker)
            OR NOT EXISTS(target.publisher_last_updated_epoch_ms))
            WITH target LIMIT $batch_size
            DELETE target
            RETURN count(*) as count
            """))
Пример #16
0
    def test_validation_statement_publish_tag(self) -> None:
        with patch.object(GraphDatabase, 'driver'), patch.object(Neo4jStalenessRemovalTask, '_execute_cypher_query') \
                as mock_execute:
            task = Neo4jStalenessRemovalTask()
            job_config = ConfigFactory.from_dict({
                f'job.identifier': 'remove_stale_data_job',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_END_POINT_KEY}': 'foobar',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_USER}': 'foo',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_PASSWORD}': 'bar',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.STALENESS_MAX_PCT}': 5,
                f'{task.get_scope()}.{neo4j_staleness_removal_task.TARGET_NODES}': ['Foo'],
                f'{task.get_scope()}.{neo4j_staleness_removal_task.TARGET_RELATIONS}': ['BAR'],
                neo4j_csv_publisher.JOB_PUBLISH_TAG: 'foo',
            })

            task.init(job_config)
            task._validate_node_staleness_pct()

            mock_execute.assert_called()
            mock_execute.assert_any_call(statement=textwrap.dedent("""
            MATCH (target:Foo)
            WHERE true
            RETURN count(*) as count
            """))

            mock_execute.assert_any_call(param_dict={'marker': u'foo'},
                                         statement=textwrap.dedent("""
            MATCH (target:Foo)
            WHERE (target.published_tag < $marker
            OR NOT EXISTS(target.published_tag))
            RETURN count(*) as count
            """))

            task._validate_relation_staleness_pct()
            mock_execute.assert_any_call(param_dict={'marker': u'foo'},
                                         statement=textwrap.dedent("""
            MATCH (start_node)-[target:BAR]-(end_node)
            WHERE (target.published_tag < $marker
            OR NOT EXISTS(target.published_tag))
            RETURN count(*) as count
            """))
Пример #17
0
    def test_delete_statement_with_target_condition(self) -> None:
        with patch.object(GraphDatabase, 'driver'), patch.object(Neo4jStalenessRemovalTask, '_execute_cypher_query') \
                as mock_execute:
            mock_execute.return_value.single.return_value = {'count': 0}
            task = Neo4jStalenessRemovalTask()
            job_config = ConfigFactory.from_dict({
                f'job.identifier': 'remove_stale_data_job',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_END_POINT_KEY}': 'foobar',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_USER}': 'foo',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.NEO4J_PASSWORD}': 'bar',
                f'{task.get_scope()}.{neo4j_staleness_removal_task.STALENESS_MAX_PCT}': 5,
                f'{task.get_scope()}.{neo4j_staleness_removal_task.TARGET_NODES}': [TargetWithCondition('Foo', '(target)-[:BAR]->(:Foo) AND target.name=\'foo_name\'')],
                f'{task.get_scope()}.{neo4j_staleness_removal_task.TARGET_RELATIONS}': [TargetWithCondition('BAR', '(start_node:Foo)-[target]->(end_node:Foo)')],
                neo4j_csv_publisher.JOB_PUBLISH_TAG: 'foo',
            })

            task.init(job_config)
            task._delete_stale_nodes()
            task._delete_stale_relations()

            mock_execute.assert_any_call(dry_run=False,
                                         param_dict={'marker': u'foo', 'batch_size': 100},
                                         statement=textwrap.dedent("""
            MATCH (target:Foo)
            WHERE (target.published_tag < $marker
            OR NOT EXISTS(target.published_tag)) AND (target)-[:BAR]->(:Foo) AND target.name=\'foo_name\'
            WITH target LIMIT $batch_size
            DETACH DELETE (target)
            RETURN count(*) as count
            """))

            mock_execute.assert_any_call(dry_run=False,
                                         param_dict={'marker': u'foo', 'batch_size': 100},
                                         statement=textwrap.dedent("""
            MATCH (start_node)-[target:BAR]-(end_node)
            WHERE (target.published_tag < $marker
            OR NOT EXISTS(target.published_tag)) AND (start_node:Foo)-[target]->(end_node:Foo)
            WITH target LIMIT $batch_size
            DELETE target
            RETURN count(*) as count
            """))