def test_invalid_object_path(self, mock_parent_init):

        # This is just the path of a bucket hence invalid filename
        file_name = 'gs://test-bucket'
        mock_parent_init.return_value = None

        gcs_bucket_helper = GoogleCloudBucketHelper()
        gcs_bucket_helper._gcs_hook = mock.Mock()

        with self.assertRaises(Exception) as context:
            gcs_bucket_helper.google_cloud_to_local(file_name)

        self.assertEqual(
            'Invalid Google Cloud Storage (GCS) object path: {}'.format(file_name),
            str(context.exception))
Exemplo n.º 2
0
    def test_invalid_object_path(self, mock_parent_init):

        # This is just the path of a bucket hence invalid filename
        file_name = 'gs://test-bucket'
        mock_parent_init.return_value = None

        gcs_bucket_helper = GoogleCloudBucketHelper()
        gcs_bucket_helper._gcs_hook = mock.Mock()

        with self.assertRaises(Exception) as context:
            gcs_bucket_helper.google_cloud_to_local(file_name)

        self.assertEqual(
            'Invalid Google Cloud Storage (GCS) object path: {}'.format(
                file_name), str(context.exception))
Exemplo n.º 3
0
 def execute(self, context):
     bucket_helper = GoogleCloudBucketHelper(self.gcp_conn_id,
                                             self.delegate_to)
     self.py_file = bucket_helper.google_cloud_to_local(self.py_file)
     hook = GFWDataFlowHook(gcp_conn_id=self.gcp_conn_id,
                            delegate_to=self.delegate_to,
                            poll_sleep=self.poll_sleep)
     dataflow_options = self.dataflow_default_options.copy()
     dataflow_options.update(self.options)
     camel_to_snake = lambda name: re.sub(
         r'[A-Z]', lambda x: '_' + x.group(0).lower(), name)
     formatted_options = {
         camel_to_snake(key): dataflow_options[key]
         for key in dataflow_options
     }
     hook.start_python_dataflow(self.job_name, formatted_options,
                                self.py_file, self.py_options)
    def test_valid_object(self, mock_parent_init):

        file_name = 'gs://test-bucket/path/to/obj.jar'
        mock_parent_init.return_value = None

        gcs_bucket_helper = GoogleCloudBucketHelper()
        gcs_bucket_helper._gcs_hook = mock.Mock()

        def _mock_download(bucket, object, filename=None):
            text_file_contents = 'text file contents'
            with open(filename, 'w') as text_file:
                text_file.write(text_file_contents)
            return text_file_contents

        gcs_bucket_helper._gcs_hook.download.side_effect = _mock_download

        local_file = gcs_bucket_helper.google_cloud_to_local(file_name)
        self.assertIn('obj.jar', local_file)
Exemplo n.º 5
0
    def test_valid_object(self, mock_parent_init):

        file_name = 'gs://test-bucket/path/to/obj.jar'
        mock_parent_init.return_value = None

        gcs_bucket_helper = GoogleCloudBucketHelper()
        gcs_bucket_helper._gcs_hook = mock.Mock()

        def _mock_download(bucket, object, filename=None):
            text_file_contents = 'text file contents'
            with open(filename, 'w') as text_file:
                text_file.write(text_file_contents)
            return text_file_contents

        gcs_bucket_helper._gcs_hook.download.side_effect = _mock_download

        local_file = gcs_bucket_helper.google_cloud_to_local(file_name)
        self.assertIn('obj.jar', local_file)
Exemplo n.º 6
0
 def execute(self, context):
     """Execute the python dataflow job."""
     bucket_helper = GoogleCloudBucketHelper(
         self.gcp_conn_id, self.delegate_to)
     self.py_file = bucket_helper.google_cloud_to_local(self.py_file)
     hook = DataFlow3Hook(gcp_conn_id=self.gcp_conn_id,
                         delegate_to=self.delegate_to,
                         poll_sleep=self.poll_sleep)
     dataflow_options = self.dataflow_default_options.copy()
     dataflow_options.update(self.options)
     # Convert argument names from lowerCamelCase to snake case.
     camel_to_snake = lambda name: re.sub(
         r'[A-Z]', lambda x: '_' + x.group(0).lower(), name)
     formatted_options = {camel_to_snake(key): dataflow_options[key]
                          for key in dataflow_options}
     hook.start_python_dataflow(
         self.job_name, formatted_options,
         self.py_file, self.py_options, py_interpreter="python3")
Exemplo n.º 7
0
    def execute_direct_runner(self, context):
        bucket_helper = GoogleCloudBucketHelper(self.gcp_conn_id,
                                                self.delegate_to)
        self.py_file = bucket_helper.google_cloud_to_local(self.py_file)
        dataflow_options = self.dataflow_default_options.copy()
        dataflow_options.update(self.options)
        # Convert argument names from lowerCamelCase to snake case.
        camel_to_snake = lambda name: re.sub(
            r'[A-Z]', lambda x: '_' + x.group(0).lower(), name)
        formatted_options = {
            camel_to_snake(key): dataflow_options[key]
            for key in dataflow_options
        }
        hook = DataFlowDirectRunnerHook(gcp_conn_id=self.gcp_conn_id,
                                        delegate_to=self.delegate_to)
        hook.start_python_dataflow(self.task_id, formatted_options,
                                   self.py_file, self.py_options)

        pass
    def test_empty_object(self, mock_parent_init):

        file_name = 'gs://test-bucket/path/to/obj.jar'
        mock_parent_init.return_value = None

        gcs_bucket_helper = GoogleCloudBucketHelper()
        gcs_bucket_helper._gcs_hook = mock.Mock()

        def _mock_download(bucket, object, filename=None):
            text_file_contents = ''
            with open(filename, 'w') as text_file:
                text_file.write(text_file_contents)
            return text_file_contents

        gcs_bucket_helper._gcs_hook.download.side_effect = _mock_download

        with self.assertRaises(Exception) as context:
            gcs_bucket_helper.google_cloud_to_local(file_name)

        self.assertEqual(
            'Failed to download Google Cloud Storage (GCS) object: {}'.format(file_name),
            str(context.exception))
Exemplo n.º 9
0
    def test_empty_object(self, mock_parent_init):

        file_name = 'gs://test-bucket/path/to/obj.jar'
        mock_parent_init.return_value = None

        gcs_bucket_helper = GoogleCloudBucketHelper()
        gcs_bucket_helper._gcs_hook = mock.Mock()

        def _mock_download(bucket, object, filename=None):
            text_file_contents = ''
            with open(filename, 'w') as text_file:
                text_file.write(text_file_contents)
            return text_file_contents

        gcs_bucket_helper._gcs_hook.download.side_effect = _mock_download

        with self.assertRaises(Exception) as context:
            gcs_bucket_helper.google_cloud_to_local(file_name)

        self.assertEqual(
            'Failed to download Google Cloud Storage (GCS) object: {}'.format(
                file_name), str(context.exception))
    def execute(self, context):
        try:
            self.log.info("| correlationId={0} | op=jar-execution | status=OK | desc=Started processing task '{1}'".
                          format(self.correlation_id, self.task_id))
            bucket_helper = GoogleCloudBucketHelper(self.gcp_conn_id, self.delegate_to)
            self.jar = bucket_helper.google_cloud_to_local(self.jar)
            command = 'java -jar {0}'.format(self.jar)

            if self.options is not None:
                for attr, value in self.options.iteritems():
                    command += " -" + attr + " \"" + value + "\""
            self.bash_command = command

            super(JVMOperator, self).execute(context)

            self.log.info("| correlationId={0} | op=jar-execution | status=OK | desc=Completed processing task '{1}'".
                          format(self.correlation_id, self.task_id))
        except Exception as exception:
            self.log.error(
                "| correlationId={0} | op=jar-execution | status=KO | desc=Failed to process the task '{1}' with exception '{2}'".
                    format(self.correlation_id, self.task_id, exception))
            raise exception