def apply_sql(query: str, output_name: Optional[str], found: Dict[str, beam.PCollection]) -> Tuple[str, PValue]: """Applies a SqlTransform with the given sql and queried PCollections. Args: query: The SQL query executed in the magic. output_name: (optional) The output variable name in __main__ module. found: The PCollections with variable names found to be used in the query. Returns: A Tuple[str, PValue]. First str value is the output variable name in __main__ module (auto-generated if not provided). Second PValue is most likely a PCollection, depending on the query. """ output_name = _generate_output_name(output_name, query, found) query, sql_source = _build_query_components(query, found) try: output = sql_source | SqlTransform(query) # Declare a variable with the output_name and output value in the # __main__ module so that the user can use the output smoothly. setattr(importlib.import_module('__main__'), output_name, output) ib.watch({output_name: output}) _LOGGER.info( "The output PCollection variable is %s with element_type %s", output_name, pformat_namedtuple(output.element_type)) return output_name, output except (KeyboardInterrupt, SystemExit): raise except Exception as e: on_error('Error when applying the Beam SQL: %s', e)
def test_pformat_namedtuple_with_unnamed_fields(self): actual = pformat_namedtuple(OptionalUnionType) # Parameters of an Union type can be in any order. possible_expected = ( 'OptionalUnionType(unnamed: typing.Union[int, str, NoneType])', 'OptionalUnionType(unnamed: typing.Union[str, int, NoneType])') self.assertIn(actual, possible_expected)
def apply_sql( query: str, output_name: Optional[str], found: Dict[str, beam.PCollection], run: bool = True) -> Tuple[str, Union[PValue, SqlNode], SqlChain]: """Applies a SqlTransform with the given sql and queried PCollections. Args: query: The SQL query executed in the magic. output_name: (optional) The output variable name in __main__ module. found: The PCollections with variable names found to be used in the query. run: Whether to prepare the SQL pipeline for a local run or not. Returns: A tuple of values. First str value is the output variable name in __main__ module, auto-generated if not provided. Second value: if run, it's a PValue; otherwise, a SqlNode tracks the SQL without applying it or executing it. Third value: SqlChain is a chain of SqlNodes that have been applied. """ output_name = _generate_output_name(output_name, query, found) query, sql_source, chain = _build_query_components( query, found, output_name, run) if run: try: output = sql_source | SqlTransform(query) # Declare a variable with the output_name and output value in the # __main__ module so that the user can use the output smoothly. output_name, output = create_var_in_main(output_name, output) _LOGGER.info( "The output PCollection variable is %s with element_type %s", output_name, pformat_namedtuple(output.element_type)) return output_name, output, chain except (KeyboardInterrupt, SystemExit): raise except: # pylint: disable=bare-except on_error('Error when applying the Beam SQL: %s', traceback.format_exc()) raise else: return output_name, chain.current, chain
def test_pformat_namedtuple(self): actual = pformat_namedtuple(ANamedTuple) self.assertEqual('ANamedTuple(a: int, b: str)', actual)
def test_pformat_namedtuple(self): self.assertEqual('ANamedTuple(a: int, b: str)', pformat_namedtuple(ANamedTuple))