def execute(self, context: Dict): salesforce = SalesforceHook(conn_id=self.salesforce_conn_id) response = salesforce.make_query(query=self.query, include_deleted=self.include_deleted, query_params=self.query_params) with tempfile.TemporaryDirectory() as tmp: path = os.path.join(tmp, "salesforce_temp_file") salesforce.write_object_to_file( query_results=response["records"], filename=path, fmt=self.export_format, coerce_to_timestamp=self.coerce_to_timestamp, record_time_added=self.record_time_added, ) hook = GCSHook(gcp_conn_id=self.gcp_conn_id) hook.upload( bucket_name=self.bucket_name, object_name=self.object_name, filename=path, gzip=self.gzip, ) gcs_uri = "gs://{}/{}".format(self.bucket_name, self.object_name) self.log.info("%s uploaded to GCS", gcs_uri) return gcs_uri
def execute(self, context: Dict) -> str: salesforce_hook = SalesforceHook( salesforce_conn_id=self.salesforce_conn_id) response = salesforce_hook.make_query( query=self.salesforce_query, include_deleted=self.include_deleted, query_params=self.query_params, ) with tempfile.TemporaryDirectory() as tmp: path = os.path.join(tmp, "salesforce_temp_file") salesforce_hook.write_object_to_file( query_results=response["records"], filename=path, fmt=self.export_format, coerce_to_timestamp=self.coerce_to_timestamp, record_time_added=self.record_time_added, ) s3_hook = S3Hook(aws_conn_id=self.aws_conn_id) s3_hook.load_file( filename=path, key=self.s3_key, bucket_name=self.s3_bucket_name, replace=self.replace, encrypt=self.encrypt, gzip=self.gzip, acl_policy=self.acl_policy, ) s3_uri = f"s3://{self.s3_bucket_name}/{self.s3_key}" self.log.info(f"Salesforce data uploaded to S3 at {s3_uri}.") return s3_uri
def execute(self, context: dict) -> dict: """ Makes an HTTP request to an APEX REST endpoint and pushes results to xcom. :param context: The task context during execution. :type context: dict :return: Apex response :rtype: dict """ sf_hook = SalesforceHook(salesforce_conn_id=self.salesforce_conn_id) conn = sf_hook.get_conn() result = conn.apexecute(action=self.endpoint, method=self.method, data=self.payload) if self.do_xcom_push: return result
def execute(self, context: 'Context'): """ Makes an HTTP request to Salesforce Bulk API. :param context: The task context during execution. :return: API response if do_xcom_push is True """ sf_hook = SalesforceHook(salesforce_conn_id=self.salesforce_conn_id) conn = sf_hook.get_conn() result = [] if self.operation == 'insert': result = conn.bulk.__getattr__(self.object_name).insert( data=self.payload, batch_size=self.batch_size, use_serial=self.use_serial) elif self.operation == 'update': result = conn.bulk.__getattr__(self.object_name).update( data=self.payload, batch_size=self.batch_size, use_serial=self.use_serial) elif self.operation == 'upsert': result = conn.bulk.__getattr__(self.object_name).upsert( data=self.payload, external_id_field=self.external_id_field, batch_size=self.batch_size, use_serial=self.use_serial, ) elif self.operation == 'delete': result = conn.bulk.__getattr__(self.object_name).delete( data=self.payload, batch_size=self.batch_size, use_serial=self.use_serial) elif self.operation == 'hard_delete': result = conn.bulk.__getattr__(self.object_name).hard_delete( data=self.payload, batch_size=self.batch_size, use_serial=self.use_serial) if self.do_xcom_push and result: return result return None
def setUp(self): self.salesforce_hook = SalesforceHook(conn_id='conn_id')
class TestSalesforceHook(unittest.TestCase): def setUp(self): self.salesforce_hook = SalesforceHook(conn_id='conn_id') def test_get_conn_exists(self): self.salesforce_hook.conn = Mock(spec=Salesforce) self.salesforce_hook.get_conn() self.assertIsNotNone(self.salesforce_hook.conn.return_value) @patch('airflow.providers.salesforce.hooks.salesforce.SalesforceHook.get_connection', return_value=Connection( login='******', password='******', extra='{"security_token": "token", "sandbox": "true"}' )) @patch('airflow.providers.salesforce.hooks.salesforce.Salesforce') def test_get_conn(self, mock_salesforce, mock_get_connection): self.salesforce_hook.get_conn() self.assertEqual(self.salesforce_hook.conn, mock_salesforce.return_value) mock_salesforce.assert_called_once_with( username=mock_get_connection.return_value.login, password=mock_get_connection.return_value.password, security_token=mock_get_connection.return_value.extra_dejson['security_token'], instance_url=mock_get_connection.return_value.host, sandbox=mock_get_connection.return_value.extra_dejson.get('sandbox', False) ) @patch('airflow.providers.salesforce.hooks.salesforce.Salesforce') def test_make_query(self, mock_salesforce): mock_salesforce.return_value.query_all.return_value = dict(totalSize=123, done=True) self.salesforce_hook.conn = mock_salesforce.return_value query = 'SELECT * FROM table' query_results = self.salesforce_hook.make_query(query, include_deleted=True) mock_salesforce.return_value.query_all.assert_called_once_with(query, include_deleted=True) self.assertEqual(query_results, mock_salesforce.return_value.query_all.return_value) @patch('airflow.providers.salesforce.hooks.salesforce.Salesforce') def test_describe_object(self, mock_salesforce): obj = 'obj_name' mock_salesforce.return_value.__setattr__(obj, Mock(spec=Salesforce)) self.salesforce_hook.conn = mock_salesforce.return_value obj_description = self.salesforce_hook.describe_object(obj) mock_salesforce.return_value.__getattr__(obj).describe.assert_called_once_with() self.assertEqual(obj_description, mock_salesforce.return_value.__getattr__(obj).describe.return_value) @patch('airflow.providers.salesforce.hooks.salesforce.SalesforceHook.get_conn') @patch('airflow.providers.salesforce.hooks.salesforce.SalesforceHook.describe_object', return_value={'fields': [{'name': 'field_1'}, {'name': 'field_2'}]}) def test_get_available_fields(self, mock_describe_object, mock_get_conn): obj = 'obj_name' available_fields = self.salesforce_hook.get_available_fields(obj) mock_get_conn.assert_called_once_with() mock_describe_object.assert_called_once_with(obj) self.assertEqual(available_fields, ['field_1', 'field_2']) @patch('airflow.providers.salesforce.hooks.salesforce.SalesforceHook.make_query') def test_get_object_from_salesforce(self, mock_make_query): salesforce_objects = self.salesforce_hook.get_object_from_salesforce(obj='obj_name', fields=['field_1', 'field_2']) mock_make_query.assert_called_once_with("SELECT field_1,field_2 FROM obj_name") self.assertEqual(salesforce_objects, mock_make_query.return_value) def test_write_object_to_file_invalid_format(self): with self.assertRaises(ValueError): self.salesforce_hook.write_object_to_file(query_results=[], filename='test', fmt="test") @patch('airflow.providers.salesforce.hooks.salesforce.pd.DataFrame.from_records', return_value=pd.DataFrame({'test': [1, 2, 3]})) def test_write_object_to_file_csv(self, mock_data_frame): mock_data_frame.return_value.to_csv = Mock() filename = 'test' data_frame = self.salesforce_hook.write_object_to_file(query_results=[], filename=filename, fmt="csv") mock_data_frame.return_value.to_csv.assert_called_once_with(filename, index=False) pd.testing.assert_frame_equal(data_frame, pd.DataFrame({'test': [1, 2, 3]})) @patch('airflow.providers.salesforce.hooks.salesforce.SalesforceHook.describe_object', return_value={'fields': [{'name': 'field_1', 'type': 'date'}]}) @patch('airflow.providers.salesforce.hooks.salesforce.pd.DataFrame.from_records', return_value=pd.DataFrame({ 'test': [1, 2, 3], 'field_1': ['2019-01-01', '2019-01-02', '2019-01-03'] })) def test_write_object_to_file_json_with_timestamp_conversion(self, mock_data_frame, mock_describe_object): mock_data_frame.return_value.to_json = Mock() filename = 'test' obj_name = 'obj_name' data_frame = self.salesforce_hook.write_object_to_file( query_results=[{'attributes': {'type': obj_name}}], filename=filename, fmt="json", coerce_to_timestamp=True ) mock_describe_object.assert_called_once_with(obj_name) mock_data_frame.return_value.to_json.assert_called_once_with(filename, "records", date_unit="s") pd.testing.assert_frame_equal(data_frame, pd.DataFrame({ 'test': [1, 2, 3], 'field_1': [1.546301e+09, 1.546387e+09, 1.546474e+09] })) @patch('airflow.providers.salesforce.hooks.salesforce.time.time', return_value=1.23) @patch('airflow.providers.salesforce.hooks.salesforce.pd.DataFrame.from_records', return_value=pd.DataFrame({'test': [1, 2, 3]})) def test_write_object_to_file_ndjson_with_record_time(self, mock_data_frame, mock_time): mock_data_frame.return_value.to_json = Mock() filename = 'test' data_frame = self.salesforce_hook.write_object_to_file( query_results=[], filename=filename, fmt="ndjson", record_time_added=True ) mock_data_frame.return_value.to_json.assert_called_once_with( filename, "records", lines=True, date_unit="s" ) pd.testing.assert_frame_equal(data_frame, pd.DataFrame({ 'test': [1, 2, 3], 'time_fetched_from_salesforce': [ mock_time.return_value, mock_time.return_value, mock_time.return_value ] }))
class TestSalesforceHook(unittest.TestCase): def setUp(self): self.salesforce_hook = SalesforceHook(conn_id="conn_id") def test_get_conn_exists(self): self.salesforce_hook.conn = Mock(spec=Salesforce) self.salesforce_hook.get_conn() self.assertIsNotNone(self.salesforce_hook.conn.return_value) @patch( "airflow.providers.salesforce.hooks.salesforce.SalesforceHook.get_connection", return_value=Connection( login="******", password="******", extra='{"security_token": "token", "domain": "test"}'), ) @patch("airflow.providers.salesforce.hooks.salesforce.Salesforce") def test_get_conn(self, mock_salesforce, mock_get_connection): self.salesforce_hook.get_conn() self.assertEqual(self.salesforce_hook.conn, mock_salesforce.return_value) mock_salesforce.assert_called_once_with( username=mock_get_connection.return_value.login, password=mock_get_connection.return_value.password, security_token=mock_get_connection.return_value. extra_dejson["security_token"], instance_url=mock_get_connection.return_value.host, domain=mock_get_connection.return_value.extra_dejson.get( "domain", None), ) @patch("airflow.providers.salesforce.hooks.salesforce.Salesforce") def test_make_query(self, mock_salesforce): mock_salesforce.return_value.query_all.return_value = dict( totalSize=123, done=True) self.salesforce_hook.conn = mock_salesforce.return_value query = "SELECT * FROM table" query_results = self.salesforce_hook.make_query(query, include_deleted=True) mock_salesforce.return_value.query_all.assert_called_once_with( query, include_deleted=True) self.assertEqual(query_results, mock_salesforce.return_value.query_all.return_value) @patch("airflow.providers.salesforce.hooks.salesforce.Salesforce") def test_describe_object(self, mock_salesforce): obj = "obj_name" mock_salesforce.return_value.__setattr__(obj, Mock(spec=Salesforce)) self.salesforce_hook.conn = mock_salesforce.return_value obj_description = self.salesforce_hook.describe_object(obj) mock_salesforce.return_value.__getattr__( obj).describe.assert_called_once_with() self.assertEqual( obj_description, mock_salesforce.return_value.__getattr__( obj).describe.return_value) @patch( "airflow.providers.salesforce.hooks.salesforce.SalesforceHook.get_conn" ) @patch( "airflow.providers.salesforce.hooks.salesforce.SalesforceHook.describe_object", return_value={"fields": [{ "name": "field_1" }, { "name": "field_2" }]}, ) def test_get_available_fields(self, mock_describe_object, mock_get_conn): obj = "obj_name" available_fields = self.salesforce_hook.get_available_fields(obj) mock_get_conn.assert_called_once_with() mock_describe_object.assert_called_once_with(obj) self.assertEqual(available_fields, ["field_1", "field_2"]) @patch( "airflow.providers.salesforce.hooks.salesforce.SalesforceHook.make_query" ) def test_get_object_from_salesforce(self, mock_make_query): salesforce_objects = self.salesforce_hook.get_object_from_salesforce( obj="obj_name", fields=["field_1", "field_2"]) mock_make_query.assert_called_once_with( "SELECT field_1,field_2 FROM obj_name") self.assertEqual(salesforce_objects, mock_make_query.return_value) def test_write_object_to_file_invalid_format(self): with self.assertRaises(ValueError): self.salesforce_hook.write_object_to_file(query_results=[], filename="test", fmt="test") @patch( "airflow.providers.salesforce.hooks.salesforce.pd.DataFrame.from_records", return_value=pd.DataFrame({ "test": [1, 2, 3], "dict": [None, None, { "foo": "bar" }] }), ) def test_write_object_to_file_csv(self, mock_data_frame): mock_data_frame.return_value.to_csv = Mock() filename = "test" data_frame = self.salesforce_hook.write_object_to_file( query_results=[], filename=filename, fmt="csv") mock_data_frame.return_value.to_csv.assert_called_once_with( filename, index=False) pd.testing.assert_frame_equal( data_frame, pd.DataFrame({ "test": [1, 2, 3], "dict": ["None", "None", str({"foo": "bar"})] })) @patch( "airflow.providers.salesforce.hooks.salesforce.SalesforceHook.describe_object", return_value={"fields": [{ "name": "field_1", "type": "date" }]}, ) @patch( "airflow.providers.salesforce.hooks.salesforce.pd.DataFrame.from_records", return_value=pd.DataFrame({ "test": [1, 2, 3], "field_1": ["2019-01-01", "2019-01-02", "2019-01-03"] }), ) def test_write_object_to_file_json_with_timestamp_conversion( self, mock_data_frame, mock_describe_object): mock_data_frame.return_value.to_json = Mock() filename = "test" obj_name = "obj_name" data_frame = self.salesforce_hook.write_object_to_file( query_results=[{ "attributes": { "type": obj_name } }], filename=filename, fmt="json", coerce_to_timestamp=True, ) mock_describe_object.assert_called_once_with(obj_name) mock_data_frame.return_value.to_json.assert_called_once_with( filename, "records", date_unit="s") pd.testing.assert_frame_equal( data_frame, pd.DataFrame({ "test": [1, 2, 3], "field_1": [1.546301e09, 1.546387e09, 1.546474e09] })) @patch("airflow.providers.salesforce.hooks.salesforce.time.time", return_value=1.23) @patch( "airflow.providers.salesforce.hooks.salesforce.pd.DataFrame.from_records", return_value=pd.DataFrame({"test": [1, 2, 3]}), ) def test_write_object_to_file_ndjson_with_record_time( self, mock_data_frame, mock_time): mock_data_frame.return_value.to_json = Mock() filename = "test" data_frame = self.salesforce_hook.write_object_to_file( query_results=[], filename=filename, fmt="ndjson", record_time_added=True) mock_data_frame.return_value.to_json.assert_called_once_with( filename, "records", lines=True, date_unit="s") pd.testing.assert_frame_equal( data_frame, pd.DataFrame({ "test": [1, 2, 3], "time_fetched_from_salesforce": [ mock_time.return_value, mock_time.return_value, mock_time.return_value, ], }), ) @patch( "airflow.providers.salesforce.hooks.salesforce.SalesforceHook.describe_object", return_value={"fields": [{ "name": "field_1", "type": "date" }]}, ) @patch( "airflow.providers.salesforce.hooks.salesforce.pd.DataFrame.from_records", return_value=pd.DataFrame({ "test": [1, 2, 3], "field_1": ["2019-01-01", "2019-01-02", "2019-01-03"] }), ) def test_obect_to_df_with_timestamp_conversion(self, mock_data_frame, mock_describe_object): obj_name = "obj_name" data_frame = self.salesforce_hook.object_to_df( query_results=[{ "attributes": { "type": obj_name } }], coerce_to_timestamp=True, ) mock_describe_object.assert_called_once_with(obj_name) pd.testing.assert_frame_equal( data_frame, pd.DataFrame({ "test": [1, 2, 3], "field_1": [1.546301e09, 1.546387e09, 1.546474e09] })) @patch("airflow.providers.salesforce.hooks.salesforce.time.time", return_value=1.23) @patch( "airflow.providers.salesforce.hooks.salesforce.pd.DataFrame.from_records", return_value=pd.DataFrame({"test": [1, 2, 3]}), ) def test_object_to_df_with_record_time(self, mock_data_frame, mock_time): data_frame = self.salesforce_hook.object_to_df(query_results=[], record_time_added=True) pd.testing.assert_frame_equal( data_frame, pd.DataFrame({ "test": [1, 2, 3], "time_fetched_from_salesforce": [ mock_time.return_value, mock_time.return_value, mock_time.return_value, ], }), )