def patch_nodes(self, patches): """Patch nodes with the given dict of patches. Note that this consumes the input! """ # because we don't have any mapping from node _names_ to nodes, and we # only have the node name in the patch, we have to iterate over all the # nodes looking for matching names. We could use _find_by_name if we # were ok with doing an O(n*m) search (one nodes scan per patch) for node in self.nodes.values(): if node.resource_type != NodeType.Model: continue patch = patches.pop(node.name, None) if not patch: continue node.patch(patch) # log debug-level warning about nodes we couldn't find if patches: for patch in patches.values(): # since patches aren't nodes, we can't use the existing # target_not_found warning logger.debug(( 'WARNING: Found documentation for model "{}" which was ' 'not found or is disabled').format(patch.name) )
def render_profile(cls, raw_profile, profile_name, target_override, cli_vars): """This is a containment zone for the hateful way we're rendering profiles. """ renderer = ConfigRenderer(cli_vars=cli_vars) # rendering profiles is a bit complex. Two constraints cause trouble: # 1) users should be able to use environment/cli variables to specify # the target in their profile. # 2) Missing environment/cli variables in profiles/targets that don't # end up getting selected should not cause errors. # so first we'll just render the target name, then we use that rendered # name to extract a profile that we can render. if target_override is not None: target_name = target_override elif 'target' in raw_profile: # render the target if it was parsed from yaml target_name = renderer.render_value(raw_profile['target']) else: target_name = 'default' logger.debug( "target not specified in profile '{}', using '{}'" .format(profile_name, target_name) ) raw_profile_data = cls._get_profile_data( raw_profile, profile_name, target_name ) profile_data = renderer.render_profile_data(raw_profile_data) return target_name, profile_data
def gcloud_installed(): try: run_cmd('.', ['gcloud', '--version']) return True except OSError as e: logger.debug(e) return False
def handle_error(cls, error, message, sql): logger.debug(message.format(sql=sql)) logger.debug(error) error_msg = "\n".join( [item['message'] for item in error.errors]) raise dbt.exceptions.DatabaseException(error_msg)
def run_sql(self, query, fetch='None', kwargs=None, connection_name=None): if connection_name is None: connection_name = '__test' if query.strip() == "": return sql = self.transform_sql(query, kwargs=kwargs) if self.adapter_type == 'bigquery': return self.run_sql_bigquery(sql, fetch) elif self.adapter_type == 'presto': return self.run_sql_presto(sql, fetch, connection_name) conn = self.adapter.acquire_connection(connection_name) with conn.handle.cursor() as cursor: logger.debug('test connection "{}" executing: {}'.format(connection_name, sql)) try: cursor.execute(sql) conn.handle.commit() if fetch == 'one': return cursor.fetchone() elif fetch == 'all': return cursor.fetchall() else: return except BaseException as e: conn.handle.rollback() print(query) print(e) raise e finally: conn.transaction_open = False
def run_dbt_and_check(self, args=None): if args is None: args = ["run"] args = ["--strict"] + args logger.info("Invoking dbt with {}".format(args)) return dbt.handle_and_check(args)
def clone_and_checkout(repo, cwd, dirname=None, remove_git_dir=False, branch=None): exists = None try: _, err = clone(repo, cwd, dirname=dirname, remove_git_dir=remove_git_dir) except dbt.exceptions.CommandResultError as exc: err = exc.stderr.decode('utf-8') exists = re.match("fatal: destination path '(.+)' already exists", err) if not exists: # something else is wrong, raise it raise directory = None start_sha = None if exists: directory = exists.group(1) logger.debug('Updating existing dependency %s.', directory) else: matches = re.match("Cloning into '(.+)'", err.decode('utf-8')) directory = matches.group(1) logger.debug('Pulling new dependency %s.', directory) full_path = os.path.join(cwd, directory) start_sha = get_current_sha(full_path) checkout(full_path, repo, branch) end_sha = get_current_sha(full_path) if exists: if start_sha == end_sha: logger.debug(' Already at %s, nothing to do.', start_sha[:7]) else: logger.debug(' Updated checkout from %s to %s.', start_sha[:7], end_sha[:7]) else: logger.debug(' Checked out at %s.', end_sha[:7]) return directory
def expand_column_types(self, goal, current, model_name=None): reference_columns = { c.name: c for c in self.get_columns_in_relation(goal, model_name=model_name) } target_columns = { c.name: c for c in self.get_columns_in_relation(current, model_name=model_name) } for column_name, reference_column in reference_columns.items(): target_column = target_columns.get(column_name) if target_column is not None and \ target_column.can_expand_to(reference_column): col_string_size = reference_column.string_size() new_type = self.Column.string_type(col_string_size) logger.debug("Changing col type from %s to %s in table %s", target_column.data_type, new_type, current) self.alter_column_type(current, column_name, new_type, model_name=model_name) if model_name is None: self.release_connection('master')
def find_schema_yml(cls, package_name, root_dir, relative_dirs): """This is common to both v1 and v2 - look through the relative_dirs under root_dir for .yml files yield pairs of filepath and loaded yaml contents. """ extension = "[!.#~]*.yml" file_matches = dbt.clients.system.find_matching( root_dir, relative_dirs, extension) for file_match in file_matches: file_contents = dbt.clients.system.load_file_contents( file_match.get('absolute_path'), strip=False) test_path = file_match.get('relative_path', '') original_file_path = os.path.join(file_match.get('searched_path'), test_path) try: test_yml = dbt.clients.yaml_helper.load_yaml_text( file_contents ) except dbt.exceptions.ValidationException as e: test_yml = None logger.info("Error reading {}:{} - Skipping\n{}".format( package_name, test_path, e)) if test_yml is None: continue yield original_file_path, test_yml
def warn_or_error(msg, node=None, log_fmt=None): if dbt.flags.WARN_ERROR: raise_compiler_error(msg, node) else: if log_fmt is not None: msg = log_fmt.format(msg) logger.warning(msg)
def path_info(self): open_cmd = dbt.clients.system.open_dir_cmd() message = PROFILE_DIR_MESSAGE.format( open_cmd=open_cmd, profiles_dir=self.profiles_dir ) logger.info(message)
def initialize_tracking(cookie_dir): global active_user active_user = User(cookie_dir) try: active_user.initialize() except Exception: logger.debug('Got an exception trying to initialize tracking', exc_info=True) active_user = User(None)
def drop_schema(self, database, schema, model_name=None): logger.debug('Dropping schema "%s"."%s".', database, schema) kwargs = { 'database_name': self.quote_as_configured(database, 'database'), 'schema_name': self.quote_as_configured(schema, 'schema'), } self.execute_macro(DROP_SCHEMA_MACRO_NAME, kwargs=kwargs, connection_name=model_name)
def compile_node(self, node, manifest, extra_context=None): if extra_context is None: extra_context = {} logger.debug("Compiling {}".format(node.get('unique_id'))) data = node.to_dict() data.update({ 'compiled': False, 'compiled_sql': None, 'extra_ctes_injected': False, 'extra_ctes': [], 'injected_sql': None, }) compiled_node = CompiledNode(**data) context = dbt.context.runtime.generate( compiled_node, self.config, manifest) context.update(extra_context) compiled_node.compiled_sql = dbt.clients.jinja.get_rendered( node.get('raw_sql'), context, node) compiled_node.compiled = True injected_node, _ = prepend_ctes(compiled_node, manifest) should_wrap = {NodeType.Test, NodeType.Operation} if injected_node.resource_type in should_wrap: # data tests get wrapped in count(*) # TODO : move this somewhere more reasonable if 'data' in injected_node.tags and \ is_type(injected_node, NodeType.Test): injected_node.wrapped_sql = ( "select count(*) from (\n{test_sql}\n) sbq").format( test_sql=injected_node.injected_sql) else: # don't wrap schema tests or analyses. injected_node.wrapped_sql = injected_node.injected_sql elif is_type(injected_node, NodeType.Archive): # unfortunately we do everything automagically for # archives. in the future it'd be nice to generate # the SQL at the parser level. pass elif(is_type(injected_node, NodeType.Model) and get_materialization(injected_node) == 'ephemeral'): pass else: injected_node.wrapped_sql = None return injected_node
def print_end_of_run_summary(num_errors, early_exit=False): if early_exit: message = yellow('Exited because of keyboard interrupt.') elif num_errors > 0: message = red('Completed with {} errors:'.format(num_errors)) else: message = green('Completed successfully') logger.info('') logger.info('{}'.format(message))
def parse_macro_file(self, macro_file_path, macro_file_contents, root_path, package_name, resource_type, tags=None, context=None): logger.debug("Parsing {}".format(macro_file_path)) to_return = {} if tags is None: tags = [] context = {} # change these to actual kwargs base_node = UnparsedMacro( path=macro_file_path, original_file_path=macro_file_path, package_name=package_name, raw_sql=macro_file_contents, root_path=root_path, ) try: ast = dbt.clients.jinja.parse(macro_file_contents) except dbt.exceptions.CompilationException as e: e.node = base_node raise e for macro_node in ast.find_all(jinja2.nodes.Macro): macro_name = macro_node.name node_type = None if macro_name.startswith(dbt.utils.MACRO_PREFIX): node_type = NodeType.Macro name = macro_name.replace(dbt.utils.MACRO_PREFIX, '') if node_type != resource_type: continue unique_id = self.get_path(resource_type, package_name, name) merged = dbt.utils.deep_merge( base_node.serialize(), { 'name': name, 'unique_id': unique_id, 'tags': tags, 'resource_type': resource_type, 'depends_on': {'macros': []}, }) new_node = ParsedMacro(**merged) to_return[unique_id] = new_node return to_return
def track(user, *args, **kwargs): if user.do_not_track: return else: logger.debug("Sending event: {}".format(kwargs)) try: tracker.track_struct_event(*args, **kwargs) except Exception: logger.debug( "An error was encountered while trying to send an event" )
def create_schema(self, database, schema, model_name=None): logger.debug('Creating schema "%s"."%s".', database, schema) if model_name is None: model_name = 'master' kwargs = { 'database_name': self.quote_as_configured(database, 'database'), 'schema_name': self.quote_as_configured(schema, 'schema'), } self.execute_macro(CREATE_SCHEMA_MACRO_NAME, kwargs=kwargs, connection_name=model_name) self.commit_if_has_connection(model_name)
def cancel(self, connection): connection_name = connection.name pid = connection.handle.get_backend_pid() sql = "select pg_terminate_backend({})".format(pid) logger.debug("Cancelling query '{}' ({})".format(connection_name, pid)) _, cursor = self.add_query(sql, 'master') res = cursor.fetchone() logger.debug("Cancel query '{}': {}".format(connection_name, res))
def _schema_is_cached(self, database, schema, model_name=None): """Check if the schema is cached, and by default logs if it is not.""" if dbt.flags.USE_CACHE is False: return False elif (database, schema) not in self.cache: logger.debug( 'On "{}": cache miss for schema "{}.{}", this is inefficient' .format(model_name or '<None>', database, schema) ) return False else: return True
def get_columns_in_relation(self, relation, model_name=None): try: table = self.connections.get_bq_table( database=relation.database, schema=relation.schema, identifier=relation.table_name, conn_name=model_name ) return self._get_dbt_columns_from_bq_table(table) except (ValueError, google.cloud.exceptions.NotFound) as e: logger.debug("get_columns_in_relation error: {}".format(e)) return []
def rmdir(path): """ Recursively deletes a directory. Includes an error handler to retry with different permissions on Windows. Otherwise, removing directories (eg. cloned via git) can cause rmtree to throw a PermissionError exception """ logger.debug("DEBUG** Window rmdir sys.platform: {}".format(sys.platform)) if sys.platform == 'win32': onerror = _windows_rmdir_readonly else: onerror = None return shutil.rmtree(path, onerror=onerror)
def run_cmd(cwd, cmd, env=None): logger.debug('Executing "{}"'.format(' '.join(cmd))) if len(cmd) == 0: raise dbt.exceptions.CommandError(cwd, cmd) # the env argument replaces the environment entirely, which has exciting # consequences on Windows! Do an update instead. full_env = env if env is not None: full_env = os.environ.copy() full_env.update(env) try: proc = subprocess.Popen( cmd, cwd=cwd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=full_env) out, err = proc.communicate() except OSError as exc: _interpret_oserror(exc, cwd, cmd) logger.debug('STDOUT: "{}"'.format(out)) logger.debug('STDERR: "{}"'.format(err)) if proc.returncode != 0: logger.debug('command return code={}'.format(proc.returncode)) raise dbt.exceptions.CommandResultError(cwd, cmd, proc.returncode, out, err) return out, err
def parse_cli_vars(var_string): try: cli_vars = yaml_helper.load_yaml_text(var_string) var_type = type(cli_vars) if var_type == dict: return cli_vars else: type_name = var_type.__name__ dbt.exceptions.raise_compiler_error( "The --vars argument must be a YAML dictionary, but was " "of type '{}'".format(type_name)) except dbt.exceptions.ValidationException as e: logger.error( "The YAML provided in the --vars argument is not valid.\n") raise
def raw_execute(self, sql, name=None, fetch=False): conn = self.get(name) client = conn.handle logger.debug('On %s: %s', name, sql) job_config = google.cloud.bigquery.QueryJobConfig() job_config.use_legacy_sql = False query_job = client.query(sql, job_config) # this blocks until the query has completed with self.exception_handler(sql, conn.name): iterator = query_job.result() return query_job, iterator
def _safe_release_connection(self): """Try to release a connection. If an exception is hit, log and return the error string. """ node_name = self.node.name try: self.adapter.release_connection(node_name) except Exception as exc: logger.debug( 'Error releasing connection for node {}: {!s}\n{}' .format(node_name, exc, traceback.format_exc()) ) return dbt.compat.to_string(exc) return None
def __deepcopy__(self, memo): path = os.path.join(self.node.get('root_path'), self.node.get('original_file_path')) logger.debug( 'dbt encountered an undefined variable, "{}" in node {}.{} ' '(source path: {})' .format(self.name, self.node.get('package_name'), self.node.get('name'), path)) # match jinja's message dbt.exceptions.raise_compiler_error( "{!r} is undefined".format(self.name), node=self.node )
def run_dbt(self, args=None, expect_pass=True, strict=True): if args is None: args = ["run"] if strict: args = ["--strict"] + args args.append('--log-cache-events') logger.info("Invoking dbt with {}".format(args)) res, success = dbt.handle_and_check(args) self.assertEqual( success, expect_pass, "dbt exit state did not match expected") return res
def get_credentials(cls, credentials): method = credentials.method # Support missing 'method' for backwards compatibility if method == 'database' or method is None: logger.debug("Connecting to Redshift using 'database' credentials") return credentials elif method == 'iam': logger.debug("Connecting to Redshift using 'IAM' credentials") return cls.get_tmp_iam_cluster_credentials(credentials) else: raise dbt.exceptions.FailedToConnectException( "Invalid 'method' in profile: '{}'".format(method))
def print_run_status_line(results): stats = { 'error': 0, 'skip': 0, 'pass': 0, 'total': 0, } for r in results: result_type = interpret_run_result(r) stats[result_type] += 1 stats['total'] += 1 stats_line = "\nDone. PASS={pass} ERROR={error} SKIP={skip} TOTAL={total}" logger.info(stats_line.format(**stats))
def diag(self): # if we got here, a 'dbt_project.yml' does exist, but we have not tried # to parse it. project_profile = None try: project = dbt.config.Project.from_current_directory() project_profile = project.profile_name except dbt.config.DbtConfigError as exc: project = 'ERROR loading project: {!s}'.format(exc) # log the profile we decided on as well, if it's available. try: profile = dbt.config.Profile.from_args(self.args, project_profile) except dbt.config.DbtConfigError as exc: profile = 'ERROR loading profile: {!s}'.format(exc) logger.info("args: {}".format(self.args)) logger.info("") logger.info("project:\n{!s}".format(project)) logger.info("") logger.info("profile:\n{!s}".format(profile))
def print_compile_stats(stats): names = { NodeType.Model: 'model', NodeType.Test: 'test', NodeType.Snapshot: 'snapshot', NodeType.Analysis: 'analysis', NodeType.Macro: 'macro', NodeType.Operation: 'operation', NodeType.Seed: 'seed file', NodeType.Source: 'source', } results = {k: 0 for k in names.keys()} results.update(stats) stat_line = ", ".join([ dbt.utils.pluralize(ct, names.get(t)) for t, ct in results.items() if t in names ]) logger.info("Found {}".format(stat_line))
def run(self): runner = RunManager(self.project, self.project['target-path'], self.args) include = self.args.models exclude = self.args.exclude test_types = [self.args.data, self.args.schema] if all(test_types) or not any(test_types): results = runner.run_tests(include, exclude, set()) elif self.args.data: results = runner.run_tests(include, exclude, {'data'}) elif self.args.schema: results = runner.run_tests(include, exclude, {'schema'}) else: raise RuntimeError("unexpected") logger.info(dbt.utils.get_run_status_line(results)) return results
def _iterate_selected_nodes(self): selector = self.get_node_selector() spec = self.get_selection_spec() nodes = sorted(selector.get_selected(spec)) if not nodes: logger.warning('No nodes selected!') return if self.manifest is None: raise InternalException( 'manifest is None in _iterate_selected_nodes') for node in nodes: if node in self.manifest.nodes: yield self.manifest.nodes[node] elif node in self.manifest.sources: yield self.manifest.sources[node] elif node in self.manifest.exposures: yield self.manifest.exposures[node] else: raise RuntimeException( f'Got an unexpected result from node selection: "{node}"' f'Expected a source or a node!')
def get_columns_in_table(self, schema_name, table_name, database=None, model_name=None): # BigQuery does not have databases -- the database parameter is here # for consistency with the base implementation conn = self.get_connection(model_name) client = conn.handle try: dataset_ref = client.dataset(schema_name) table_ref = dataset_ref.table(table_name) table = client.get_table(table_ref) return self.get_dbt_columns_from_bq_table(table) except (ValueError, google.cloud.exceptions.NotFound) as e: logger.debug("get_columns_in_table error: {}".format(e)) return []
def invalid_source_fail_unless_test( node, target_name, target_table_name, disabled ): if node.resource_type == NodeType.Test: msg = get_source_not_found_or_disabled_msg( node, target_name, target_table_name, disabled ) if disabled: logger.debug(warning_tag(msg)) else: warn_or_error( msg, log_fmt=warning_tag('{}') ) else: source_target_not_found( node, target_name, target_table_name, disabled=disabled )
def _checkout(self, project): """Performs a shallow clone of the repository into the downloads directory. This function can be called repeatedly. If the project has already been checked out at this version, it will be a no-op. Returns the path to the checked out directory.""" if len(self.version) != 1: dbt.exceptions.raise_dependency_error( 'Cannot checkout repository until the version is pinned.') try: dir_ = dbt.clients.git.clone_and_checkout( self.git, DOWNLOADS_PATH, branch=self.version[0], dirname=self._checkout_name) except dbt.exceptions.ExecutableError as exc: if exc.cmd and exc.cmd[0] == 'git': logger.error( 'Make sure git is installed on your machine. More ' 'information: ' 'https://docs.getdbt.com/docs/package-management' ) raise return os.path.join(DOWNLOADS_PATH, dir_)
def get_credentials(cls, credentials): method = credentials.method # Support missing 'method' for backwards compatibility if method == 'database' or method is None: logger.debug("Connecting to Redshift using 'database' credentials") # this requirement is really annoying to encode into json schema, # so validate it here if credentials.password is None: raise dbt.exceptions.FailedToConnectException( "'password' field is required for 'database' credentials" ) return credentials elif method == 'iam': logger.debug("Connecting to Redshift using 'IAM' credentials") return cls.get_tmp_iam_cluster_credentials(credentials) else: raise dbt.exceptions.FailedToConnectException( "Invalid 'method' in profile: '{}'".format(method))
def parse_file(self, block: FileBlock): # mark the file as seen, even if there are no macros in it self.results.get_file(block.file) source_file = block.file original_file_path = source_file.path.original_file_path logger.debug("Parsing {}".format(original_file_path)) # this is really only used for error messages base_node = UnparsedMacro( path=original_file_path, original_file_path=original_file_path, package_name=self.project.project_name, raw_sql=source_file.contents, root_path=self.project.project_root, resource_type=NodeType.Macro, ) for node in self.parse_unparsed_macros(base_node): self.results.add_macro(block.file, node)
def run(self): project_dir = self.args.project_name profiles_dir = dbt.config.PROFILES_DIR profiles_file = os.path.join(profiles_dir, 'profiles.yml') self.create_profiles_dir(profiles_dir) self.create_profiles_file(profiles_file) msg = "Creating dbt configuration folder at {}" logger.info(msg.format(profiles_dir)) if os.path.exists(project_dir): raise RuntimeError("directory {} already exists!".format( project_dir )) self.clone_starter_repo(project_dir) addendum = self.get_addendum(project_dir, profiles_dir) logger.info(addendum)
def run_dbt(self, args=None, expect_pass=True, strict=True, clear_adapters=True): # clear the adapter cache if clear_adapters: reset_adapters() if args is None: args = ["run"] if strict: args = ["--strict"] + args args.append('--log-cache-events') logger.info("Invoking dbt with {}".format(args)) res, success = dbt.handle_and_check(args) self.assertEqual(success, expect_pass, "dbt exit state did not match expected") return res
def tearDown(self): # get any current run adapter and clean up its connections before we # reset them. It'll probably be different from ours because # handle_and_check() calls reset_adapters(). register_adapter(self.config) adapter = get_adapter(self.config) if adapter is not self.adapter: adapter.cleanup_connections() if not hasattr(self, 'adapter'): self.adapter = adapter self._drop_schemas() self.adapter.cleanup_connections() reset_adapters() os.chdir(INITIAL_ROOT) try: shutil.rmtree(self.test_root_dir) except EnvironmentError: logger.exception('Could not clean up after test - {} not removable' .format(self.test_root_dir))
def open(cls, connection): if connection.state == 'open': logger.debug('Connection is already open, skipping open.') return connection credentials = connection.credentials #if credentials.method == 'kerberos': # auth = KerberosAuthentication() #else: # auth = prestodb.constants.DEFAULT_AUTH hive_conn = hive.connect( host=credentials.host, port=credentials.get('port', 10000), username=credentials.get('username', ''), #getuser()), password=credentials.get('pass', ''), #schema=credentials.schema, auth='LDAP') connection.state = 'open' connection.handle = ConnectionWrapper(hive_conn) return connection
def exception_handler(cls, profile, sql, model_name=None, connection_name=None): connection = cls.get_connection(profile, connection_name) schema = connection.get('credentials', {}).get('schema') try: yield except psycopg2.ProgrammingError as e: logger.debug('Postgres error: {}'.format(str(e))) cls.rollback(connection) error_data = { "model": model_name, "schema": schema, "user": connection.get('credentials', {}).get('user') } if 'must be owner of relation' in e.diag.message_primary: raise RuntimeError( RELATION_NOT_OWNER_MESSAGE.format(**error_data)) elif "permission denied for" in e.diag.message_primary: raise RuntimeError( RELATION_PERMISSION_DENIED_MESSAGE.format(**error_data)) else: raise e except Exception as e: logger.debug("Error running SQL: %s", sql) logger.debug("Rolling back transaction.") cls.rollback(connection) raise e
def dependency_projects(project): module_paths = [ GLOBAL_DBT_MODULES_PATH, os.path.join(project['project-root'], project['modules-path']) ] for module_path in module_paths: logger.debug("Loading dependency project from {}".format(module_path)) for obj in os.listdir(module_path): full_obj = os.path.join(module_path, obj) if not os.path.isdir(full_obj) or obj.startswith('__'): # exclude non-dirs and dirs that start with __ # the latter could be something like __pycache__ # for the global dbt modules dir continue try: yield dbt.project.read_project( os.path.join(full_obj, 'dbt_project.yml'), project.profiles_dir, profile_to_load=project.profile_to_load, args=project.args) except dbt.project.DbtProjectError as e: logger.info( "Error reading dependency project at {}".format(full_obj)) logger.info(str(e))
def add_query(self, sql: str, auto_begin: bool = True, bindings: Optional[Any] = None, abridge_sql_log: bool = False) -> Tuple[Connection, Any]: connection = self.get_thread_connection() if auto_begin and connection.transaction_open is False: self.begin() logger.debug('Using {} connection "{}".'.format( self.TYPE, connection.name)) with self.exception_handler(sql): if abridge_sql_log: log_sql = '{}...'.format(sql[:512]) else: log_sql = sql logger.debug(f'On {connection.name}: {sql}') pre = time.time() cursor = connection.handle.cursor() # Driver will fail if bindings are passed to function and not needed if bindings: cursor.execute(sql, bindings) else: cursor.execute(sql) logger.debug( f"SQL status: {self.get_response(cursor)} in {time.time() - pre:0.2f} seconds" ) return connection, cursor
def open(cls, connection): if connection.state == 'open': logger.debug('Connection is already open, skipping open.') return connection try: handle = cls.get_bigquery_client(connection.credentials) except google.auth.exceptions.DefaultCredentialsError: logger.info("Please log into GCP to continue") gcloud.setup_default_credentials() handle = cls.get_bigquery_client(connection.credentials) except Exception as e: logger.debug("Got an error when attempting to create a bigquery " "client: '{}'".format(e)) connection.handle = None connection.state = 'fail' raise FailedToConnectException(str(e)) connection.handle = handle connection.state = 'open' return connection
def run(self): os.chdir(self.config.target_path) port = self.args.port shutil.copyfile(DOCS_INDEX_FILE_PATH, 'index.html') logger.info("Serving docs at 0.0.0.0:{}".format(port)) logger.info( "To access from your browser, navigate to: http://localhost:{}" .format(port) ) logger.info("Press Ctrl+C to exit.\n\n") # mypy doesn't think SimpleHTTPRequestHandler is ok here, but it is httpd = TCPServer( # type: ignore ('0.0.0.0', port), SimpleHTTPRequestHandler # type: ignore ) # type: ignore if self.args.open_browser: try: webbrowser.open_new_tab(f'http://127.0.0.1:{port}') except webbrowser.Error: pass try: httpd.serve_forever() # blocks finally: httpd.shutdown() httpd.server_close() return None
def exception_handler(self, sql): try: yield except cx_Oracle.DatabaseError as e: logger.info('Oracle error: {}'.format(str(e))) try: # attempt to release the connection self.release() except cx_Oracle.Error: logger.info("Failed to release connection!") pass raise dbt.exceptions.DatabaseException(str(e).strip()) from e except Exception as e: logger.info("Rolling back transaction.") self.release() if isinstance(e, dbt.exceptions.RuntimeException): # during a sql query, an internal to dbt exception was raised. # this sounds a lot like a signal handler and probably has # useful information, so raise it without modification. raise e raise dbt.exceptions.RuntimeException(e) from e
def add_query(self, sql: str, auto_begin: bool = True, bindings: Optional[Any] = {}, abridge_sql_log: bool = False) -> Tuple[Connection, Any]: connection = self.get_thread_connection() if auto_begin and connection.transaction_open is False: self.begin() logger.debug('Using {} connection "{}".'.format( self.TYPE, connection.name)) with self.exception_handler(sql): if abridge_sql_log: log_sql = '{}...'.format(sql[:512]) else: log_sql = sql logger.debug( 'On {connection_name}: {sql}', connection_name=connection.name, sql=log_sql, ) pre = time.time() cursor = connection.handle.cursor() cursor.execute(sql, bindings) connection.handle.commit() logger.debug("SQL status: {status} in {elapsed:0.2f} seconds", status=self.get_status(cursor), elapsed=(time.time() - pre)) return connection, cursor
def run_from_graph(self, Selector, Runner, query): flat_graph, linker = self.compile(self.project) selector = Selector(linker, flat_graph) selected_nodes = selector.select(query) dep_list = selector.as_node_list(selected_nodes) profile = self.project.run_environment() adapter = get_adapter(profile) flat_nodes = dbt.utils.flatten_nodes(dep_list) if len(flat_nodes) == 0: logger.info("WARNING: Nothing to do. Try checking your model " "configs and model specification args") return [] elif Runner.print_header: stat_line = dbt.ui.printer.get_counts(flat_nodes) logger.info("") dbt.ui.printer.print_timestamped_line(stat_line) dbt.ui.printer.print_timestamped_line("") else: logger.info("") try: Runner.before_run(self.project, adapter, flat_graph) started = time.time() res = self.execute_nodes(linker, Runner, flat_graph, dep_list) elapsed = time.time() - started Runner.after_run(self.project, adapter, res, flat_graph, elapsed) finally: adapter.cleanup_connections() return res
def run_forever(self): host = self.args.host port = self.args.port addr = (host, port) display_host = host if host == '0.0.0.0': display_host = 'localhost' logger.info('Serving RPC server at {}:{}, pid={}'.format( *addr, os.getpid())) logger.info('Supported methods: {}'.format( sorted(self.task_manager.methods()))) logger.info('Send requests to http://{}:{}/jsonrpc'.format( display_host, port)) app = DispatcherMiddleware(self.handle_request, { '/jsonrpc': self.handle_jsonrpc_request, }) # we have to run in threaded mode if we want to share subprocess # handles, which is the easiest way to implement `kill` (it makes # `ps` easier as well). The alternative involves tracking # metadata+state in a multiprocessing.Manager, adds polling the # manager to the request task handler and in general gets messy # fast. run_simple( host, port, app, threaded=not self.task_manager.single_threaded(), )
def run_from_args(parsed): log_cache_events(getattr(parsed, 'log_cache_events', False)) flags.set_from_args(parsed) parsed.cls.pre_init_hook(parsed) # we can now use the logger for stdout logger.info("Running with dbt{}".format(dbt.version.installed)) # this will convert DbtConfigErrors into RuntimeExceptions task = parsed.cls.from_args(args=parsed) logger.debug("running dbt with arguments {parsed}", parsed=str(parsed)) log_path = None if task.config is not None: log_path = getattr(task.config, 'log_path', None) # we can finally set the file logger up log_manager.set_path(log_path) if dbt.tracking.active_user is not None: # mypy appeasement, always true logger.debug("Tracking: {}".format(dbt.tracking.active_user.state())) results = None with track_run(task): results = task.run() return task, results
def run(self): dbt.clients.system.make_directory(self.project['modules-path']) dbt.clients.system.make_directory(DOWNLOADS_PATH) packages = _read_packages(self.project) if not packages: logger.info('Warning: No packages were found in packages.yml') return pending_deps = PackageListing.create(packages) final_deps = PackageListing.create([]) while pending_deps: sub_deps = PackageListing.create([]) for name, package in pending_deps.items(): final_deps.incorporate(package) final_deps[name].resolve_version() target_metadata = final_deps[name].fetch_metadata(self.project) sub_deps.incorporate_from_yaml(_read_packages(target_metadata)) pending_deps = sub_deps self._check_for_duplicate_project_names(final_deps) for _, package in final_deps.items(): logger.info('Installing %s', package) package.install(self.project) logger.info(' Installed from %s\n', package.nice_version_name()) self.track_package_install( package_name=package.name, source_type=package.source_type(), version=package.version_name())
def run(self): """ This function takes all the paths in the target file and cleans the project paths that are not protected. """ for path in self.config.clean_targets: logger.info("Checking {}/*".format(path)) if not self.__is_protected_path(path): shutil.rmtree(path, True) logger.info(" Cleaned {}/*".format(path)) else: logger.info("ERROR: not cleaning {}/* because it is " "protected".format(path)) logger.info("Finished cleaning all paths.")
def try_create_schema(self): profile = self.project.run_environment() adapter = get_adapter(profile) schema_name = adapter.get_default_schema(profile) model_name = None connection = adapter.begin(profile) schema_exists = adapter.check_schema_exists(profile, schema_name) adapter.commit(connection) if schema_exists: logger.debug('schema {} already exists -- ' 'not creating'.format(schema_name)) return try: connection = adapter.begin(profile) adapter.create_schema(profile, schema_name) adapter.commit(connection) except (dbt.exceptions.FailedToConnectException, psycopg2.OperationalError) as e: logger.info("ERROR: Could not connect to the target database. Try " "`dbt debug` for more information.") logger.info(str(e)) raise
def run(self): dbt.clients.system.make_directory(self.config.modules_path) _initialize_downloads() packages = self.config.packages.packages if not packages: logger.info('Warning: No packages were found in packages.yml') return pending_deps = PackageListing.create(packages) final_deps = PackageListing.create([]) while pending_deps: sub_deps = PackageListing.create([]) for name, package in pending_deps.items(): final_deps.incorporate(package) final_deps[name].resolve_version() target_config = final_deps[name].fetch_metadata(self.config) sub_deps.incorporate_from_yaml(target_config.packages) pending_deps = sub_deps self._check_for_duplicate_project_names(final_deps) for _, package in final_deps.items(): logger.info('Installing %s', package) package.install(self.config) logger.info(' Installed from %s\n', package.nice_version_name()) self.track_package_install(package_name=package.name, source_type=package.source_type(), version=package.version_name()) if REMOVE_DOWNLOADS: dbt.clients.system.rmtree(DOWNLOADS_PATH)
def run(self): os.chdir(self.config.target_path) port = self.args.port shutil.copyfile(DOCS_INDEX_FILE_PATH, 'index.html') logger.info("Serving docs at 0.0.0.0:{}".format(port)) logger.info( "To access from your browser, navigate to http://localhost:{}.". format(port)) logger.info("Press Ctrl+C to exit.\n\n") httpd = TCPServer(('0.0.0.0', port), SimpleHTTPRequestHandler) try: webbrowser.open_new_tab('http://127.0.0.1:{}'.format(port)) except webbrowser.Error: pass try: httpd.serve_forever() # blocks finally: httpd.shutdown() httpd.server_close() return None
def open_connection(cls, connection): if connection.get('state') == 'open': logger.debug('Connection is already open, skipping open.') return connection result = connection.copy() credentials = connection.get('credentials', {}) try: handle = cls.get_bigquery_client(credentials) except google.auth.exceptions.DefaultCredentialsError as e: logger.info("Please log into GCP to continue") dbt.clients.gcloud.setup_default_credentials() handle = cls.get_bigquery_client(credentials) except Exception as e: raise logger.debug("Got an error when attempting to create a bigquery " "client: '{}'".format(e)) result['handle'] = None result['state'] = 'fail' raise dbt.exceptions.FailedToConnectException(str(e)) result['handle'] = handle result['state'] = 'open' return result
def patch_nodes( self, patches: MutableMapping[str, ParsedNodePatch] ) -> None: """Patch nodes with the given dict of patches. Note that this consumes the input! This relies on the fact that all nodes have unique _name_ fields, not just unique unique_id fields. """ # because we don't have any mapping from node _names_ to nodes, and we # only have the node name in the patch, we have to iterate over all the # nodes looking for matching names. We could use a NameSearcher if we # were ok with doing an O(n*m) search (one nodes scan per patch) for node in self.nodes.values(): patch = patches.pop(node.name, None) if not patch: continue expected_key = node.resource_type.pluralize() if expected_key != patch.yaml_key: if patch.yaml_key == 'models': deprecations.warn( 'models-key-mismatch', patch=patch, node=node, expected_key=expected_key ) else: raise_invalid_patch( node, patch.yaml_key, patch.original_file_path ) node.patch(patch) # log debug-level warning about nodes we couldn't find if patches: for patch in patches.values(): # since patches aren't nodes, we can't use the existing # target_not_found warning logger.debug(( 'WARNING: Found documentation for resource "{}" which was ' 'not found or is disabled').format(patch.name) )