示例#1
0
def warn_or_error(msg, node=None, log_fmt=None):
    if dbt.flags.WARN_ERROR:
        raise_compiler_error(msg, node)
    else:
        if log_fmt is not None:
            msg = log_fmt.format(msg)
        logger.warning(msg)
示例#2
0
def main(args=None):
    if args is None:
        args = sys.argv[1:]
    with log_manager.applicationbound():
        try:
            results, succeeded = handle_and_check(args)
            if succeeded:
                exit_code = ExitCodes.Success.value
            else:
                exit_code = ExitCodes.ModelError.value

        except KeyboardInterrupt:
            logger.info("ctrl-c")
            exit_code = ExitCodes.UnhandledError.value

        # This can be thrown by eg. argparse
        except SystemExit as e:
            exit_code = e.code

        except BaseException as e:
            logger.warning("Encountered an error:")
            logger.warning(str(e))

            if log_manager.initialized:
                logger.debug(traceback.format_exc())
            elif not isinstance(e, RuntimeException):
                # if it did not come from dbt proper and the logger is not
                # initialized (so there's no safe path to log to), log the
                # stack trace at error level.
                logger.error(traceback.format_exc())
            exit_code = ExitCodes.UnhandledError.value

    sys.exit(exit_code)
示例#3
0
 def _iterate_selected_nodes(self):
     nodes = sorted(self.select_nodes())
     if not nodes:
         logger.warning('No nodes selected!')
         return
     for node in nodes:
         yield self.manifest.nodes[node]
示例#4
0
    def run(self):
        """
        Run dbt for the query, based on the graph.
        """
        self._runtime_initialize()

        if self._flattened_nodes is None:
            raise InternalException(
                'after _runtime_initialize, _flattened_nodes was still None'
            )

        if len(self._flattened_nodes) == 0:
            logger.warning("WARNING: Nothing to do. Try checking your model "
                           "configs and model specification args")
            return self.get_result(
                results=[],
                generated_at=datetime.utcnow(),
                elapsed_time=0.0,
            )
        else:
            with TextOnly():
                logger.info("")

        selected_uids = frozenset(n.unique_id for n in self._flattened_nodes)
        result = self.execute_with_hooks(selected_uids)

        if flags.WRITE_JSON:
            self.write_manifest()
            self.write_result(result)

        self.task_end_messages(result.results)
        return result
示例#5
0
def warn_or_error(msg, node=None, log_fmt=None):
    if dbt.flags.WARN_ERROR:
        raise_compiler_error(msg, node)
    else:
        if log_fmt is not None:
            msg = log_fmt.format(msg)
        logger.warning(msg)
示例#6
0
    def parse_v1_test_yml(cls, original_file_path, test_yml, package_name,
                          root_project, all_projects, root_dir, macros=None):
        """Parse v1 yml contents, yielding parsed nodes.

        A v1 yml file is laid out like this ('variables' written
        bash-curly-brace style):

            ${model_name}:
                constraints:
                    ${constraint_type}:
                        - ${column_1}
                        - ${column_2}
                    ${other_constraint_type}:
                        - ...
            ${other_model_name}:
                constraints:
                    ...
        """
        for model_name, test_spec in test_yml.items():
            # in v1 we can really only have constraints, so not having any is
            # a concern
            no_tests_warning = (
                "* WARNING: No constraints found for model '{}' in file {}\n"
            )
            if not isinstance(test_spec, dict):
                msg = (
                    "Invalid test config given in {} near {} (expected a dict)"
                ).format(original_file_path, test_spec)
                if dbt.flags.STRICT_MODE:
                    dbt.exceptions.raise_compiler_error(msg)
                dbt.utils.compiler_warning(model_name, msg,
                                           resource_type='test')
                continue

            if test_spec is None or test_spec.get('constraints') is None:
                logger.warning(no_tests_warning.format(model_name,
                               original_file_path))
                continue
            constraints = test_spec.get('constraints', {})
            for test_type, configs in constraints.items():
                if configs is None:
                    continue

                if not isinstance(configs, (list, tuple)):
                    dbt.utils.compiler_warning(
                        model_name,
                        "Invalid test config given in {}".format(
                            original_file_path)
                    )
                    continue

                for config in configs:
                    test_args = cls._build_v1_test_args(config)
                    to_add = cls.build_node(
                        model_name, package_name, test_type, test_args,
                        root_dir, original_file_path,
                        root_project, all_projects, macros)
                    if to_add is not None:
                        yield to_add
示例#7
0
    def open(cls, connection):
        if connection.state == 'open':
            logger.debug('Connection is already open, skipping open.')
            return connection

        creds = connection.credentials
        connect_retries = creds.get('connect_retries', 0)
        connect_timeout = creds.get('connect_timeout', 10)

        exc = None
        for i in range(1 + connect_retries):
            try:
                if creds.method == 'http':

                    cls.validate_creds(
                        creds,
                        ['token', 'host', 'port', 'cluster', 'organization'])

                    conn_url = SPARK_CONNECTION_URL.format(**creds)
                    transport = THttpClient.THttpClient(conn_url)

                    raw_token = "token:{}".format(creds.token).encode()
                    token = base64.standard_b64encode(raw_token).decode()
                    transport.setCustomHeaders(
                        {'Authorization': 'Basic {}'.format(token)})

                    conn = hive.connect(thrift_transport=transport)
                elif creds.method == 'thrift':
                    cls.validate_creds(creds, ['host'])

                    conn = hive.connect(host=creds.host,
                                        port=creds.get('port'),
                                        username=creds.get('user'))
                break
            except Exception as e:
                exc = e
                if getattr(e, 'message', None) is None:
                    raise

                message = e.message.lower()
                is_pending = 'pending' in message
                is_starting = 'temporarily_unavailable' in message

                warning = "Warning: {}\n\tRetrying in {} seconds ({} of {})"
                if is_pending or is_starting:
                    logger.warning(
                        warning.format(e.message, connect_timeout, i + 1,
                                       connect_retries))
                    time.sleep(connect_timeout)
                else:
                    raise
        else:
            raise exc

        wrapped = ConnectionWrapper(conn)

        connection.state = 'open'
        connection.handle = wrapped
        return connection
示例#8
0
def warn_or_raise(exc, log_fmt=None):
    if dbt.flags.WARN_ERROR:
        raise exc
    else:
        msg = str(exc)
        if log_fmt is not None:
            msg = log_fmt.format(msg)
        logger.warning(msg)
示例#9
0
    def parse_schema_tests(cls, tests, root_project, projects, macros=None):
        to_return = {}

        for test in tests:
            raw_yml = test.get('raw_yml')
            test_name = "{}:{}".format(test.get('package_name'),
                                       test.get('path'))

            try:
                test_yml = dbt.clients.yaml_helper.load_yaml_text(raw_yml)
            except dbt.exceptions.ValidationException as e:
                test_yml = None
                logger.info("Error reading {} - Skipping\n{}".format(
                            test_name, e))

            if test_yml is None:
                continue

            no_tests_warning = ("* WARNING: No constraints found for model"
                                " '{}' in file {}\n")
            for model_name, test_spec in test_yml.items():
                if test_spec is None or test_spec.get('constraints') is None:
                    test_path = test.get('original_file_path', '<unknown>')
                    logger.warning(no_tests_warning.format(model_name,
                                   test_path))
                    continue

                constraints = test_spec.get('constraints', {})
                for test_type, configs in constraints.items():
                    if configs is None:
                        continue

                    if not isinstance(configs, (list, tuple)):

                        dbt.utils.compiler_warning(
                            model_name,
                            "Invalid test config given in {} near {}".format(
                                test.get('path'),
                                configs))
                        continue

                    for config in configs:
                        to_add = cls.get_parsed_schema_test(
                                    test, test_type, model_name, config,
                                    root_project, projects, macros)

                        if to_add is not None:
                            to_return[to_add.get('unique_id')] = to_add

        return to_return
示例#10
0
文件: utils.py 项目: massmutual/dbt
def invalid_ref_fail_unless_test(node, target_model_name,
                                 target_model_package, disabled):
    if node.get('resource_type') == NodeType.Test:
        msg = invalid_ref_test_message(node, target_model_name,
                                       target_model_package, disabled)
        if disabled:
            logger.debug(msg)
        else:
            logger.warning(msg)

    else:
        dbt.exceptions.ref_target_not_found(
            node,
            target_model_name,
            target_model_package)
示例#11
0
 def _iterate_selected_nodes(self):
     nodes = sorted(self.select_nodes())
     if not nodes:
         logger.warning('No nodes selected!')
         return
     if self.manifest is None:
         raise InternalException(
             'manifest is None in _iterate_selected_nodes')
     for node in nodes:
         if node in self.manifest.nodes:
             yield self.manifest.nodes[node]
         elif node in self.manifest.sources:
             yield self.manifest.sources[node]
         else:
             raise RuntimeException(
                 f'Got an unexpected result from node selection: "{node}"'
                 f'Expected a source or a node!')
示例#12
0
    def check_modified(
        self,
        old: Optional[SelectorTarget],
        new: SelectorTarget,
    ) -> bool:
        # check if there are any changes in macros, if so, log a warning the
        # first time
        if self.macros_were_modified is None:
            self.macros_were_modified = self._macros_modified()
            if self.macros_were_modified:
                log_str = ', '.join(self.macros_were_modified)
                logger.warning(
                    warning_tag(
                        f'During a state comparison, dbt detected a change in '
                        f'macros. This will not be marked as a modification. Some '
                        f'macros: {log_str}'))

        return not new.same_contents(old)  # type: ignore
示例#13
0
    def run(self):
        """
        Run dbt for the query, based on the graph.
        """
        self._runtime_initialize()

        if len(self._flattened_nodes) == 0:
            logger.warning("WARNING: Nothing to do. Try checking your model "
                           "configs and model specification args")
            return []
        else:
            logger.info("")

        selected_uids = frozenset(n.unique_id for n in self._flattened_nodes)
        result = self.execute_with_hooks(selected_uids)

        result.write(self.result_path())

        self.task_end_messages(result.results)
        return result.results
示例#14
0
 def handle_failure(num_ok, unsent):
     # num_ok will always be 0, unsent will always be 1 entry long, because
     # the buffer is length 1, so not much to talk about
     logger.warning('Error sending message, disabling tracking')
     do_not_track()
示例#15
0
    def open(cls, connection):
        if connection.state == ConnectionState.OPEN:
            logger.debug('Connection is already open, skipping open.')
            return connection

        creds = connection.credentials
        exc = None

        for i in range(1 + creds.connect_retries):
            try:
                if creds.method == 'http':
                    cls.validate_creds(
                        creds,
                        ['token', 'host', 'port', 'cluster', 'organization'])

                    conn_url = cls.SPARK_CONNECTION_URL.format(
                        host=creds.host,
                        port=creds.port,
                        organization=creds.organization,
                        cluster=creds.cluster)

                    logger.debug("connection url: {}".format(conn_url))

                    transport = THttpClient.THttpClient(conn_url)

                    raw_token = "token:{}".format(creds.token).encode()
                    token = base64.standard_b64encode(raw_token).decode()
                    transport.setCustomHeaders(
                        {'Authorization': 'Basic {}'.format(token)})

                    conn = hive.connect(thrift_transport=transport)
                elif creds.method == 'thrift':
                    cls.validate_creds(creds,
                                       ['host', 'port', 'user', 'schema'])

                    conn = hive.connect(host=creds.host,
                                        port=creds.port,
                                        username=creds.user)
                else:
                    raise dbt.exceptions.DbtProfileError(
                        f"invalid credential method: {creds.method}")
                break
            except Exception as e:
                exc = e
                if isinstance(e, EOFError):
                    # The user almost certainly has invalid credentials.
                    # Perhaps a token expired, or something
                    msg = 'Failed to connect'
                    if creds.token is not None:
                        msg += ', is your token valid?'
                    raise dbt.exceptions.FailedToConnectException(msg) from e
                retryable_message = _is_retryable_error(e)
                if retryable_message:
                    msg = (f"Warning: {retryable_message}\n\tRetrying in "
                           f"{creds.connect_timeout} seconds "
                           f"({i} of {creds.connect_retries})")
                    logger.warning(msg)
                    time.sleep(creds.connect_timeout)
                else:
                    raise dbt.exceptions.FailedToConnectException(
                        'failed to connect') from e
        else:
            raise exc

        handle = ConnectionWrapper(conn)
        connection.handle = handle
        connection.state = ConnectionState.OPEN
        return connection
示例#16
0
 def reopen_conn_on_error(error):
     if isinstance(error, REOPENABLE_ERRORS):
         logger.warning('Reopening connection after {!r}', error)
         self.close(conn)
         self.open(conn)
         return
示例#17
0
    def open(cls, connection):
        if connection.state == ConnectionState.OPEN:
            logger.debug('Connection is already open, skipping open.')
            return connection

        creds = connection.credentials
        exc = None

        for i in range(1 + creds.connect_retries):
            try:
                if creds.method == 'http':
                    cls.validate_creds(
                        creds,
                        ['token', 'host', 'port', 'cluster', 'organization'])

                    conn_url = cls.SPARK_CONNECTION_URL.format(
                        host=creds.host,
                        port=creds.port,
                        organization=creds.organization,
                        cluster=creds.cluster)

                    logger.debug("connection url: {}".format(conn_url))

                    transport = THttpClient.THttpClient(conn_url)

                    raw_token = "token:{}".format(creds.token).encode()
                    token = base64.standard_b64encode(raw_token).decode()
                    transport.setCustomHeaders(
                        {'Authorization': 'Basic {}'.format(token)})

                    conn = hive.connect(thrift_transport=transport)
                elif creds.method == 'thrift':
                    cls.validate_creds(creds,
                                       ['host', 'port', 'user', 'schema'])

                    conn = hive.connect(host=creds.host,
                                        port=creds.port,
                                        username=creds.user)
                else:
                    raise dbt.exceptions.DbtProfileError(
                        f"invalid credential method: {creds.method}")
                break
            except Exception as e:
                exc = e
                if getattr(e, 'message', None) is None:
                    raise dbt.exceptions.FailedToConnectException(str(e))

                message = e.message.lower()
                is_pending = 'pending' in message
                is_starting = 'temporarily_unavailable' in message

                warning = "Warning: {}\n\tRetrying in {} seconds ({} of {})"
                if is_pending or is_starting:
                    msg = warning.format(e.message, creds.connect_timeout, i,
                                         creds.connect_retries)
                    logger.warning(msg)
                    time.sleep(creds.connect_timeout)
                else:
                    raise dbt.exceptions.FailedToConnectException(str(e))
        else:
            raise exc

        handle = ConnectionWrapper(conn)
        connection.handle = handle
        connection.state = ConnectionState.OPEN
        return connection
示例#18
0
    def open(cls, connection):
        if connection.state == ConnectionState.OPEN:
            logger.debug('Connection is already open, skipping open.')
            return connection

        creds = connection.credentials
        exc = None

        for i in range(1 + creds.connect_retries):
            try:
                if creds.method == SparkConnectionMethod.HTTP:
                    cls.validate_creds(
                        creds,
                        ['token', 'host', 'port', 'cluster', 'organization'])

                    conn_url = cls.SPARK_CONNECTION_URL.format(
                        host=creds.host,
                        port=creds.port,
                        organization=creds.organization,
                        cluster=creds.cluster)

                    logger.debug("connection url: {}".format(conn_url))

                    transport = THttpClient.THttpClient(conn_url)

                    raw_token = "token:{}".format(creds.token).encode()
                    token = base64.standard_b64encode(raw_token).decode()
                    transport.setCustomHeaders(
                        {'Authorization': 'Basic {}'.format(token)})

                    conn = hive.connect(thrift_transport=transport)
                    handle = PyhiveConnectionWrapper(conn)
                elif creds.method == SparkConnectionMethod.THRIFT:
                    cls.validate_creds(creds,
                                       ['host', 'port', 'user', 'schema'])

                    conn = hive.connect(host=creds.host,
                                        port=creds.port,
                                        username=creds.user,
                                        auth=creds.auth,
                                        kerberos_service_name=creds.
                                        kerberos_service_name)  # noqa
                    handle = PyhiveConnectionWrapper(conn)
                elif creds.method == SparkConnectionMethod.ODBC:
                    http_path = None
                    if creds.cluster is not None:
                        required_fields = [
                            'driver', 'host', 'port', 'token', 'organization',
                            'cluster'
                        ]
                        http_path = cls.SPARK_CLUSTER_HTTP_PATH.format(
                            organization=creds.organization,
                            cluster=creds.cluster)
                    elif creds.endpoint is not None:
                        required_fields = [
                            'driver', 'host', 'port', 'token', 'endpoint'
                        ]
                        http_path = cls.SPARK_SQL_ENDPOINT_HTTP_PATH.format(
                            endpoint=creds.endpoint)
                    else:
                        raise dbt.exceptions.DbtProfileError(
                            "Either `cluster` or `endpoint` must set when"
                            " using the odbc method to connect to Spark")

                    cls.validate_creds(creds, required_fields)

                    dbt_spark_version = __version__.version
                    user_agent_entry = f"fishtown-analytics-dbt-spark/{dbt_spark_version} (Databricks)"  # noqa

                    # https://www.simba.com/products/Spark/doc/v2/ODBC_InstallGuide/unix/content/odbc/options/driver.htm
                    connection_str = _build_odbc_connnection_string(
                        DRIVER=creds.driver,
                        HOST=creds.host,
                        PORT=creds.port,
                        UID="token",
                        PWD=creds.token,
                        HTTPPath=http_path,
                        AuthMech=3,
                        SparkServerType=3,
                        ThriftTransport=2,
                        SSL=1,
                        UserAgentEntry=user_agent_entry,
                    )

                    conn = pyodbc.connect(connection_str, autocommit=True)
                    handle = PyodbcConnectionWrapper(conn)
                else:
                    raise dbt.exceptions.DbtProfileError(
                        f"invalid credential method: {creds.method}")
                break
            except Exception as e:
                exc = e
                if isinstance(e, EOFError):
                    # The user almost certainly has invalid credentials.
                    # Perhaps a token expired, or something
                    msg = 'Failed to connect'
                    if creds.token is not None:
                        msg += ', is your token valid?'
                    raise dbt.exceptions.FailedToConnectException(msg) from e
                retryable_message = _is_retryable_error(e)
                if retryable_message and creds.connect_retries > 0:
                    msg = (f"Warning: {retryable_message}\n\tRetrying in "
                           f"{creds.connect_timeout} seconds "
                           f"({i} of {creds.connect_retries})")
                    logger.warning(msg)
                    time.sleep(creds.connect_timeout)
                else:
                    raise dbt.exceptions.FailedToConnectException(
                        'failed to connect') from e
        else:
            raise exc

        connection.handle = handle
        connection.state = ConnectionState.OPEN
        return connection