def __post_init__(self): if not self.schemas: self.schemas = set() if not self.evaluated: self.evaluated = set() if is_in_ipython(): from IPython import get_ipython self.execution_count = get_ipython().execution_count
def __init__(self, cache_manager=None): self._cache_manager = cache_manager # Register a cleanup routine when kernel is restarted or terminated. if cache_manager: atexit.register(self.cleanup) # Holds class instances, module object, string of module names. self._watching_set = set() # Holds variables list of (Dict[str, object]). self._watching_dict_list = [] # Holds results of main jobs as Dict[Pipeline, PipelineResult]. # Each key is a pipeline instance defined by the end user. The # InteractiveRunner is responsible for populating this dictionary # implicitly. self._main_pipeline_results = {} # Holds results of background caching jobs as # Dict[Pipeline, PipelineResult]. Each key is a pipeline instance defined by # the end user. The InteractiveRunner is responsible for populating this # dictionary implicitly when a background caching jobs is started. self._background_caching_pipeline_results = {} self._cached_source_signature = {} self._tracked_user_pipelines = set() # Tracks the computation completeness of PCollections. PCollections tracked # here don't need to be re-computed when data introspection is needed. self._computed_pcolls = set() # Always watch __main__ module. self.watch('__main__') # Do a warning level logging if current python version is below 3.6. if sys.version_info < (3, 6): self._is_py_version_ready = False _LOGGER.warning('Interactive Beam requires Python 3.5.3+.') else: self._is_py_version_ready = True # Check if [interactive] dependencies are installed. try: import IPython # pylint: disable=unused-import import jsons # pylint: disable=unused-import import timeloop # pylint: disable=unused-import from facets_overview.generic_feature_statistics_generator import GenericFeatureStatisticsGenerator # pylint: disable=unused-import self._is_interactive_ready = True except ImportError: self._is_interactive_ready = False _LOGGER.warning( 'Dependencies required for Interactive Beam PCollection ' 'visualization are not available, please use: `pip ' 'install apache-beam[interactive]` to install necessary ' 'dependencies to enable all data visualization features.') self._is_in_ipython = is_in_ipython() self._is_in_notebook = is_in_notebook() if not self._is_in_ipython: _LOGGER.warning( 'You cannot use Interactive Beam features when you are ' 'not in an interactive environment such as a Jupyter ' 'notebook or ipython terminal.') if self._is_in_ipython and not self._is_in_notebook: _LOGGER.warning( 'You have limited Interactive Beam features since your ' 'ipython kernel is not connected any notebook frontend.')
def _inner(): options = self.to_options() # Caches the output_pcoll to a GCS bucket. try: execution_count = 0 if is_in_ipython(): from IPython import get_ipython execution_count = get_ipython().execution_count output_location = '{}/{}'.format( options.view_as( GoogleCloudOptions).staging_location, self.output_name) _ = self.output_pcoll | 'WriteOuput{}_{}ToGCS'.format( self.output_name, execution_count) >> WriteToText(output_location) _LOGGER.info( 'Data of output PCollection %s will be written to %s', self.output_name, output_location) except (KeyboardInterrupt, SystemExit): raise except: # pylint: disable=bare-except # The transform has been added before, noop. pass if self.verbose: _LOGGER.info( 'Running the pipeline on Dataflow with pipeline options %s.', pformat_dict(options.display_data())) result = create_runner('DataflowRunner').run_pipeline( self.p, options) cloud_options = options.view_as(GoogleCloudOptions) url = ( 'https://console.cloud.google.com/dataflow/jobs/%s/%s?project=%s' % (cloud_options.region, result.job_id(), cloud_options.project)) display( HTML( 'Click <a href="%s" target="_new">here</a> for the details ' 'of your Dataflow job.' % url)) result_name = 'result_{}'.format(self.output_name) create_var_in_main(result_name, result) if self.verbose: _LOGGER.info( 'The pipeline result of the run can be accessed from variable ' '%s. The current status is %s.', result_name, result)
def __init__(self, cache_manager=None): self._cache_manager = cache_manager # Register a cleanup routine when kernel is restarted or terminated. if cache_manager: atexit.register(self.cleanup) # Holds class instances, module object, string of module names. self._watching_set = set() # Holds variables list of (Dict[str, object]). self._watching_dict_list = [] # Holds results of main jobs as Dict[Pipeline, PipelineResult]. # Each key is a pipeline instance defined by the end user. The # InteractiveRunner is responsible for populating this dictionary # implicitly. self._main_pipeline_results = {} # Holds background caching jobs as Dict[Pipeline, BackgroundCachingJob]. # Each key is a pipeline instance defined by the end user. The # InteractiveRunner or its enclosing scope is responsible for populating # this dictionary implicitly when a background caching jobs is started. self._background_caching_jobs = {} # Holds TestStreamServiceControllers that controls gRPC servers serving # events as test stream of TestStreamPayload.Event. # Dict[Pipeline, TestStreamServiceController]. Each key is a pipeline # instance defined by the end user. The InteractiveRunner or its enclosing # scope is responsible for populating this dictionary implicitly when a new # controller is created to start a new gRPC server. The server stays alive # until a new background caching job is started thus invalidating everything # the gRPC server serves. self._test_stream_service_controllers = {} self._cached_source_signature = {} self._tracked_user_pipelines = set() # Tracks the computation completeness of PCollections. PCollections tracked # here don't need to be re-computed when data introspection is needed. self._computed_pcolls = set() # Always watch __main__ module. self.watch('__main__') # Do a warning level logging if current python version is below 3.6. if sys.version_info < (3, 6): self._is_py_version_ready = False _LOGGER.warning('Interactive Beam requires Python 3.5.3+.') else: self._is_py_version_ready = True # Check if [interactive] dependencies are installed. try: import IPython # pylint: disable=unused-import import timeloop # pylint: disable=unused-import from facets_overview.generic_feature_statistics_generator import GenericFeatureStatisticsGenerator # pylint: disable=unused-import self._is_interactive_ready = True except ImportError: self._is_interactive_ready = False _LOGGER.warning( 'Dependencies required for Interactive Beam PCollection ' 'visualization are not available, please use: `pip ' 'install apache-beam[interactive]` to install necessary ' 'dependencies to enable all data visualization features.') self._is_in_ipython = is_in_ipython() self._is_in_notebook = is_in_notebook() if not self._is_in_ipython: _LOGGER.warning( 'You cannot use Interactive Beam features when you are ' 'not in an interactive environment such as a Jupyter ' 'notebook or ipython terminal.') if self._is_in_ipython and not self._is_in_notebook: _LOGGER.warning( 'You have limited Interactive Beam features since your ' 'ipython kernel is not connected any notebook frontend.') if self._is_in_notebook: self.load_jquery_with_datatable() self.import_html_to_head([ 'https://raw.githubusercontent.com/PAIR-code/facets/1.0.0/facets-dist' '/facets-jupyter.html' ]) register_ipython_log_handler()
def __init__(self): # Registers a cleanup routine when system exits. atexit.register(self.cleanup) # Holds cache managers that manage source recording and intermediate # PCollection cache for each pipeline. Each key is a stringified user # defined pipeline instance's id. self._cache_managers = {} # Holds RecordingManagers keyed by pipeline instance id. self._recording_managers = {} # Holds class instances, module object, string of module names. self._watching_set = set() # Holds variables list of (Dict[str, object]). self._watching_dict_list = [] # Holds results of main jobs as Dict[str, PipelineResult]. # Each key is a pipeline instance defined by the end user. The # InteractiveRunner is responsible for populating this dictionary # implicitly. self._main_pipeline_results = {} # Holds background caching jobs as Dict[str, BackgroundCachingJob]. # Each key is a pipeline instance defined by the end user. The # InteractiveRunner or its enclosing scope is responsible for populating # this dictionary implicitly when a background caching jobs is started. self._background_caching_jobs = {} # Holds TestStreamServiceControllers that controls gRPC servers serving # events as test stream of TestStreamPayload.Event. # Dict[str, TestStreamServiceController]. Each key is a pipeline # instance defined by the end user. The InteractiveRunner or its enclosing # scope is responsible for populating this dictionary implicitly when a new # controller is created to start a new gRPC server. The server stays alive # until a new background caching job is started thus invalidating everything # the gRPC server serves. self._test_stream_service_controllers = {} self._cached_source_signature = {} self._tracked_user_pipelines = UserPipelineTracker() from apache_beam.runners.interactive.interactive_beam import clusters self.clusters = clusters # Tracks the computation completeness of PCollections. PCollections tracked # here don't need to be re-computed when data introspection is needed. self._computed_pcolls = set() # Always watch __main__ module. self.watch('__main__') # Check if [interactive] dependencies are installed. try: import IPython # pylint: disable=unused-import import timeloop # pylint: disable=unused-import from facets_overview.generic_feature_statistics_generator import GenericFeatureStatisticsGenerator # pylint: disable=unused-import from google.cloud import dataproc_v1 # pylint: disable=unused-import self._is_interactive_ready = True except ImportError: self._is_interactive_ready = False _LOGGER.warning( 'Dependencies required for Interactive Beam PCollection ' 'visualization are not available, please use: `pip ' 'install apache-beam[interactive]` to install necessary ' 'dependencies to enable all data visualization features.') self._is_in_ipython = is_in_ipython() self._is_in_notebook = is_in_notebook() if not self._is_in_ipython: _LOGGER.warning( 'You cannot use Interactive Beam features when you are ' 'not in an interactive environment such as a Jupyter ' 'notebook or ipython terminal.') if self._is_in_ipython and not self._is_in_notebook: _LOGGER.warning( 'You have limited Interactive Beam features since your ' 'ipython kernel is not connected to any notebook frontend.') if self._is_in_notebook: self.load_jquery_with_datatable() register_ipython_log_handler() # A singleton inspector instance to message information of current # environment to other applications. self._inspector = InteractiveEnvironmentInspector() # A similar singleton inspector except it includes synthetic variables # generated by Interactive Beam. self._inspector_with_synthetic = InteractiveEnvironmentInspector( ignore_synthetic=False) self.sql_chain = {}
def test_is_not_ipython_when_ipython_errors_out(self, corrupted): self.assertFalse(is_in_ipython())
def test_is_not_in_ipython_when_no_ipython_dep(self, unavailable): self.assertFalse(is_in_ipython())
def test_is_in_ipython_when_in_ipython_kernel(self, kernel): self.assertTrue(is_in_ipython())