def warn_or_error(msg, node=None, log_fmt=None): if dbt.flags.WARN_ERROR: raise_compiler_error(msg, node) else: if log_fmt is not None: msg = log_fmt.format(msg) logger.warning(msg)
def main(args=None): if args is None: args = sys.argv[1:] with log_manager.applicationbound(): try: results, succeeded = handle_and_check(args) if succeeded: exit_code = ExitCodes.Success.value else: exit_code = ExitCodes.ModelError.value except KeyboardInterrupt: logger.info("ctrl-c") exit_code = ExitCodes.UnhandledError.value # This can be thrown by eg. argparse except SystemExit as e: exit_code = e.code except BaseException as e: logger.warning("Encountered an error:") logger.warning(str(e)) if log_manager.initialized: logger.debug(traceback.format_exc()) elif not isinstance(e, RuntimeException): # if it did not come from dbt proper and the logger is not # initialized (so there's no safe path to log to), log the # stack trace at error level. logger.error(traceback.format_exc()) exit_code = ExitCodes.UnhandledError.value sys.exit(exit_code)
def _iterate_selected_nodes(self): nodes = sorted(self.select_nodes()) if not nodes: logger.warning('No nodes selected!') return for node in nodes: yield self.manifest.nodes[node]
def run(self): """ Run dbt for the query, based on the graph. """ self._runtime_initialize() if self._flattened_nodes is None: raise InternalException( 'after _runtime_initialize, _flattened_nodes was still None' ) if len(self._flattened_nodes) == 0: logger.warning("WARNING: Nothing to do. Try checking your model " "configs and model specification args") return self.get_result( results=[], generated_at=datetime.utcnow(), elapsed_time=0.0, ) else: with TextOnly(): logger.info("") selected_uids = frozenset(n.unique_id for n in self._flattened_nodes) result = self.execute_with_hooks(selected_uids) if flags.WRITE_JSON: self.write_manifest() self.write_result(result) self.task_end_messages(result.results) return result
def warn_or_error(msg, node=None, log_fmt=None): if dbt.flags.WARN_ERROR: raise_compiler_error(msg, node) else: if log_fmt is not None: msg = log_fmt.format(msg) logger.warning(msg)
def parse_v1_test_yml(cls, original_file_path, test_yml, package_name, root_project, all_projects, root_dir, macros=None): """Parse v1 yml contents, yielding parsed nodes. A v1 yml file is laid out like this ('variables' written bash-curly-brace style): ${model_name}: constraints: ${constraint_type}: - ${column_1} - ${column_2} ${other_constraint_type}: - ... ${other_model_name}: constraints: ... """ for model_name, test_spec in test_yml.items(): # in v1 we can really only have constraints, so not having any is # a concern no_tests_warning = ( "* WARNING: No constraints found for model '{}' in file {}\n" ) if not isinstance(test_spec, dict): msg = ( "Invalid test config given in {} near {} (expected a dict)" ).format(original_file_path, test_spec) if dbt.flags.STRICT_MODE: dbt.exceptions.raise_compiler_error(msg) dbt.utils.compiler_warning(model_name, msg, resource_type='test') continue if test_spec is None or test_spec.get('constraints') is None: logger.warning(no_tests_warning.format(model_name, original_file_path)) continue constraints = test_spec.get('constraints', {}) for test_type, configs in constraints.items(): if configs is None: continue if not isinstance(configs, (list, tuple)): dbt.utils.compiler_warning( model_name, "Invalid test config given in {}".format( original_file_path) ) continue for config in configs: test_args = cls._build_v1_test_args(config) to_add = cls.build_node( model_name, package_name, test_type, test_args, root_dir, original_file_path, root_project, all_projects, macros) if to_add is not None: yield to_add
def open(cls, connection): if connection.state == 'open': logger.debug('Connection is already open, skipping open.') return connection creds = connection.credentials connect_retries = creds.get('connect_retries', 0) connect_timeout = creds.get('connect_timeout', 10) exc = None for i in range(1 + connect_retries): try: if creds.method == 'http': cls.validate_creds( creds, ['token', 'host', 'port', 'cluster', 'organization']) conn_url = SPARK_CONNECTION_URL.format(**creds) transport = THttpClient.THttpClient(conn_url) raw_token = "token:{}".format(creds.token).encode() token = base64.standard_b64encode(raw_token).decode() transport.setCustomHeaders( {'Authorization': 'Basic {}'.format(token)}) conn = hive.connect(thrift_transport=transport) elif creds.method == 'thrift': cls.validate_creds(creds, ['host']) conn = hive.connect(host=creds.host, port=creds.get('port'), username=creds.get('user')) break except Exception as e: exc = e if getattr(e, 'message', None) is None: raise message = e.message.lower() is_pending = 'pending' in message is_starting = 'temporarily_unavailable' in message warning = "Warning: {}\n\tRetrying in {} seconds ({} of {})" if is_pending or is_starting: logger.warning( warning.format(e.message, connect_timeout, i + 1, connect_retries)) time.sleep(connect_timeout) else: raise else: raise exc wrapped = ConnectionWrapper(conn) connection.state = 'open' connection.handle = wrapped return connection
def warn_or_raise(exc, log_fmt=None): if dbt.flags.WARN_ERROR: raise exc else: msg = str(exc) if log_fmt is not None: msg = log_fmt.format(msg) logger.warning(msg)
def parse_schema_tests(cls, tests, root_project, projects, macros=None): to_return = {} for test in tests: raw_yml = test.get('raw_yml') test_name = "{}:{}".format(test.get('package_name'), test.get('path')) try: test_yml = dbt.clients.yaml_helper.load_yaml_text(raw_yml) except dbt.exceptions.ValidationException as e: test_yml = None logger.info("Error reading {} - Skipping\n{}".format( test_name, e)) if test_yml is None: continue no_tests_warning = ("* WARNING: No constraints found for model" " '{}' in file {}\n") for model_name, test_spec in test_yml.items(): if test_spec is None or test_spec.get('constraints') is None: test_path = test.get('original_file_path', '<unknown>') logger.warning(no_tests_warning.format(model_name, test_path)) continue constraints = test_spec.get('constraints', {}) for test_type, configs in constraints.items(): if configs is None: continue if not isinstance(configs, (list, tuple)): dbt.utils.compiler_warning( model_name, "Invalid test config given in {} near {}".format( test.get('path'), configs)) continue for config in configs: to_add = cls.get_parsed_schema_test( test, test_type, model_name, config, root_project, projects, macros) if to_add is not None: to_return[to_add.get('unique_id')] = to_add return to_return
def invalid_ref_fail_unless_test(node, target_model_name, target_model_package, disabled): if node.get('resource_type') == NodeType.Test: msg = invalid_ref_test_message(node, target_model_name, target_model_package, disabled) if disabled: logger.debug(msg) else: logger.warning(msg) else: dbt.exceptions.ref_target_not_found( node, target_model_name, target_model_package)
def _iterate_selected_nodes(self): nodes = sorted(self.select_nodes()) if not nodes: logger.warning('No nodes selected!') return if self.manifest is None: raise InternalException( 'manifest is None in _iterate_selected_nodes') for node in nodes: if node in self.manifest.nodes: yield self.manifest.nodes[node] elif node in self.manifest.sources: yield self.manifest.sources[node] else: raise RuntimeException( f'Got an unexpected result from node selection: "{node}"' f'Expected a source or a node!')
def check_modified( self, old: Optional[SelectorTarget], new: SelectorTarget, ) -> bool: # check if there are any changes in macros, if so, log a warning the # first time if self.macros_were_modified is None: self.macros_were_modified = self._macros_modified() if self.macros_were_modified: log_str = ', '.join(self.macros_were_modified) logger.warning( warning_tag( f'During a state comparison, dbt detected a change in ' f'macros. This will not be marked as a modification. Some ' f'macros: {log_str}')) return not new.same_contents(old) # type: ignore
def run(self): """ Run dbt for the query, based on the graph. """ self._runtime_initialize() if len(self._flattened_nodes) == 0: logger.warning("WARNING: Nothing to do. Try checking your model " "configs and model specification args") return [] else: logger.info("") selected_uids = frozenset(n.unique_id for n in self._flattened_nodes) result = self.execute_with_hooks(selected_uids) result.write(self.result_path()) self.task_end_messages(result.results) return result.results
def handle_failure(num_ok, unsent): # num_ok will always be 0, unsent will always be 1 entry long, because # the buffer is length 1, so not much to talk about logger.warning('Error sending message, disabling tracking') do_not_track()
def open(cls, connection): if connection.state == ConnectionState.OPEN: logger.debug('Connection is already open, skipping open.') return connection creds = connection.credentials exc = None for i in range(1 + creds.connect_retries): try: if creds.method == 'http': cls.validate_creds( creds, ['token', 'host', 'port', 'cluster', 'organization']) conn_url = cls.SPARK_CONNECTION_URL.format( host=creds.host, port=creds.port, organization=creds.organization, cluster=creds.cluster) logger.debug("connection url: {}".format(conn_url)) transport = THttpClient.THttpClient(conn_url) raw_token = "token:{}".format(creds.token).encode() token = base64.standard_b64encode(raw_token).decode() transport.setCustomHeaders( {'Authorization': 'Basic {}'.format(token)}) conn = hive.connect(thrift_transport=transport) elif creds.method == 'thrift': cls.validate_creds(creds, ['host', 'port', 'user', 'schema']) conn = hive.connect(host=creds.host, port=creds.port, username=creds.user) else: raise dbt.exceptions.DbtProfileError( f"invalid credential method: {creds.method}") break except Exception as e: exc = e if isinstance(e, EOFError): # The user almost certainly has invalid credentials. # Perhaps a token expired, or something msg = 'Failed to connect' if creds.token is not None: msg += ', is your token valid?' raise dbt.exceptions.FailedToConnectException(msg) from e retryable_message = _is_retryable_error(e) if retryable_message: msg = (f"Warning: {retryable_message}\n\tRetrying in " f"{creds.connect_timeout} seconds " f"({i} of {creds.connect_retries})") logger.warning(msg) time.sleep(creds.connect_timeout) else: raise dbt.exceptions.FailedToConnectException( 'failed to connect') from e else: raise exc handle = ConnectionWrapper(conn) connection.handle = handle connection.state = ConnectionState.OPEN return connection
def reopen_conn_on_error(error): if isinstance(error, REOPENABLE_ERRORS): logger.warning('Reopening connection after {!r}', error) self.close(conn) self.open(conn) return
def open(cls, connection): if connection.state == ConnectionState.OPEN: logger.debug('Connection is already open, skipping open.') return connection creds = connection.credentials exc = None for i in range(1 + creds.connect_retries): try: if creds.method == 'http': cls.validate_creds( creds, ['token', 'host', 'port', 'cluster', 'organization']) conn_url = cls.SPARK_CONNECTION_URL.format( host=creds.host, port=creds.port, organization=creds.organization, cluster=creds.cluster) logger.debug("connection url: {}".format(conn_url)) transport = THttpClient.THttpClient(conn_url) raw_token = "token:{}".format(creds.token).encode() token = base64.standard_b64encode(raw_token).decode() transport.setCustomHeaders( {'Authorization': 'Basic {}'.format(token)}) conn = hive.connect(thrift_transport=transport) elif creds.method == 'thrift': cls.validate_creds(creds, ['host', 'port', 'user', 'schema']) conn = hive.connect(host=creds.host, port=creds.port, username=creds.user) else: raise dbt.exceptions.DbtProfileError( f"invalid credential method: {creds.method}") break except Exception as e: exc = e if getattr(e, 'message', None) is None: raise dbt.exceptions.FailedToConnectException(str(e)) message = e.message.lower() is_pending = 'pending' in message is_starting = 'temporarily_unavailable' in message warning = "Warning: {}\n\tRetrying in {} seconds ({} of {})" if is_pending or is_starting: msg = warning.format(e.message, creds.connect_timeout, i, creds.connect_retries) logger.warning(msg) time.sleep(creds.connect_timeout) else: raise dbt.exceptions.FailedToConnectException(str(e)) else: raise exc handle = ConnectionWrapper(conn) connection.handle = handle connection.state = ConnectionState.OPEN return connection
def open(cls, connection): if connection.state == ConnectionState.OPEN: logger.debug('Connection is already open, skipping open.') return connection creds = connection.credentials exc = None for i in range(1 + creds.connect_retries): try: if creds.method == SparkConnectionMethod.HTTP: cls.validate_creds( creds, ['token', 'host', 'port', 'cluster', 'organization']) conn_url = cls.SPARK_CONNECTION_URL.format( host=creds.host, port=creds.port, organization=creds.organization, cluster=creds.cluster) logger.debug("connection url: {}".format(conn_url)) transport = THttpClient.THttpClient(conn_url) raw_token = "token:{}".format(creds.token).encode() token = base64.standard_b64encode(raw_token).decode() transport.setCustomHeaders( {'Authorization': 'Basic {}'.format(token)}) conn = hive.connect(thrift_transport=transport) handle = PyhiveConnectionWrapper(conn) elif creds.method == SparkConnectionMethod.THRIFT: cls.validate_creds(creds, ['host', 'port', 'user', 'schema']) conn = hive.connect(host=creds.host, port=creds.port, username=creds.user, auth=creds.auth, kerberos_service_name=creds. kerberos_service_name) # noqa handle = PyhiveConnectionWrapper(conn) elif creds.method == SparkConnectionMethod.ODBC: http_path = None if creds.cluster is not None: required_fields = [ 'driver', 'host', 'port', 'token', 'organization', 'cluster' ] http_path = cls.SPARK_CLUSTER_HTTP_PATH.format( organization=creds.organization, cluster=creds.cluster) elif creds.endpoint is not None: required_fields = [ 'driver', 'host', 'port', 'token', 'endpoint' ] http_path = cls.SPARK_SQL_ENDPOINT_HTTP_PATH.format( endpoint=creds.endpoint) else: raise dbt.exceptions.DbtProfileError( "Either `cluster` or `endpoint` must set when" " using the odbc method to connect to Spark") cls.validate_creds(creds, required_fields) dbt_spark_version = __version__.version user_agent_entry = f"fishtown-analytics-dbt-spark/{dbt_spark_version} (Databricks)" # noqa # https://www.simba.com/products/Spark/doc/v2/ODBC_InstallGuide/unix/content/odbc/options/driver.htm connection_str = _build_odbc_connnection_string( DRIVER=creds.driver, HOST=creds.host, PORT=creds.port, UID="token", PWD=creds.token, HTTPPath=http_path, AuthMech=3, SparkServerType=3, ThriftTransport=2, SSL=1, UserAgentEntry=user_agent_entry, ) conn = pyodbc.connect(connection_str, autocommit=True) handle = PyodbcConnectionWrapper(conn) else: raise dbt.exceptions.DbtProfileError( f"invalid credential method: {creds.method}") break except Exception as e: exc = e if isinstance(e, EOFError): # The user almost certainly has invalid credentials. # Perhaps a token expired, or something msg = 'Failed to connect' if creds.token is not None: msg += ', is your token valid?' raise dbt.exceptions.FailedToConnectException(msg) from e retryable_message = _is_retryable_error(e) if retryable_message and creds.connect_retries > 0: msg = (f"Warning: {retryable_message}\n\tRetrying in " f"{creds.connect_timeout} seconds " f"({i} of {creds.connect_retries})") logger.warning(msg) time.sleep(creds.connect_timeout) else: raise dbt.exceptions.FailedToConnectException( 'failed to connect') from e else: raise exc connection.handle = handle connection.state = ConnectionState.OPEN return connection