def main(step_run_ref_bucket, s3_dir_key): session = boto3.client("s3") file_manager = S3FileManager(session, step_run_ref_bucket, "") file_handle = S3FileHandle(step_run_ref_bucket, s3_dir_key) step_run_ref_data = file_manager.read_data(file_handle) step_run_ref = pickle.loads(step_run_ref_data) events_bucket = step_run_ref_bucket events_s3_key = os.path.dirname( s3_dir_key) + "/" + PICKLED_EVENTS_FILE_NAME def put_events(events): file_obj = io.BytesIO(pickle.dumps(events)) session.put_object(Body=file_obj, Bucket=events_bucket, Key=events_s3_key) # Set up a thread to handle writing events back to the plan process, so execution doesn't get # blocked on remote communication events_queue = Queue() event_writing_thread = Thread( target=event_writing_loop, kwargs=dict(events_queue=events_queue, put_events_fn=put_events), ) event_writing_thread.start() with DagsterInstance.ephemeral() as instance: try: for event in run_step_from_ref(step_run_ref, instance): events_queue.put(event) finally: events_queue.put(DONE) event_writing_thread.join()
def test_s3_file_manager_read(): state = {'called': 0} bar_bytes = 'bar'.encode() class S3Mock(mock.MagicMock): def download_file(self, *_args, **kwargs): state['called'] += 1 assert state['called'] == 1 state['bucket'] = kwargs.get('Bucket') state['key'] = kwargs.get('Key') file_name = kwargs.get('Filename') state['file_name'] = file_name with open(file_name, 'wb') as ff: ff.write(bar_bytes) s3_mock = S3Mock() file_manager = S3FileManager(s3_mock, 'some-bucket', 'some-key') file_handle = S3FileHandle('some-bucket', 'some-key/kdjfkjdkfjkd') with file_manager.read(file_handle) as file_obj: assert file_obj.read() == bar_bytes assert state['bucket'] == file_handle.s3_bucket assert state['key'] == file_handle.s3_key # read again. cached with file_manager.read(file_handle) as file_obj: assert file_obj.read() == bar_bytes assert os.path.exists(state['file_name']) file_manager.delete_local_temp() assert not os.path.exists(state['file_name'])
def get_step_events(self, step_context, run_id, step_key): events_file_handle = S3FileHandle( self.staging_bucket, self._artifact_s3_key(run_id, step_key, PICKLED_EVENTS_FILE_NAME), ) events_data = step_context.file_manager.read_data(events_file_handle) return pickle.loads(events_data)
def main(step_run_ref_bucket, s3_dir_key): session = boto3.client('s3') file_manager = S3FileManager(session, step_run_ref_bucket, '') file_handle = S3FileHandle(step_run_ref_bucket, s3_dir_key) step_run_ref_data = file_manager.read_data(file_handle) step_run_ref = pickle.loads(step_run_ref_data) events = list(run_step_from_ref(step_run_ref)) file_obj = io.BytesIO(pickle.dumps(events)) events_key = os.path.dirname(s3_dir_key) + '/' + PICKLED_EVENTS_FILE_NAME session.put_object(Body=file_obj, Bucket=step_run_ref_bucket, Key=events_key)
def get_file_handle(self, file_key): check.str_param(file_key, 'file_key') return S3FileHandle(self.s3_bucket, self.get_full_key(file_key))