def hello_cereal(): response = requests.get("https://docs.dagster.io/assets/cereal.csv") lines = response.text.split("\n") cereals = [row for row in csv.DictReader(lines)] get_dagster_logger().info(f"Found {len(cereals)} cereals") return cereals
def hello_cereal() -> List[dict]: """Example of a Dagster op that retrieves data from HTTP source.""" response = requests.get("https://docs.dagster.io/assets/cereal.csv") lines = response.text.split("\n") cereals = list(csv.DictReader(lines)) get_dagster_logger().info(f"Found {len(cereals)} cereals") return cereals
def hello_cereal(context): response = requests.get("https://docs.dagster.io/assets/cereal.csv") lines = response.text.split("\n") cereals = [row for row in csv.DictReader(lines)] date = context.op_config["date"] get_dagster_logger().info( f"Today is {date}. Found {len(cereals)} cereals.")
def find_highest_protein_cereal(cereals: List[dict]) -> str: """Example of a Dagster op that takes input and produces output.""" sorted_by_protein = list( sorted(cereals, key=lambda cereal: cereal["protein"])) get_dagster_logger().info( f'{sorted_by_protein[-1]["name"]} is the cereal that contains the most protein' ) return sorted_by_protein[-1]["name"]
def define_multilevel_logging_pipeline(inside, python): if not inside: outside_logger = logging.getLogger("my_logger_outside") if python else get_dagster_logger() @solid def my_solid1(): if inside: logger = logging.getLogger("my_logger_inside") if python else get_dagster_logger() else: logger = outside_logger for level in [ logging.DEBUG, logging.INFO, ]: logger.log(level, "foobar%s", "baz") @solid def my_solid2(_in): if inside: logger = logging.getLogger("my_logger_inside") if python else get_dagster_logger() else: logger = outside_logger for level in [ logging.WARNING, logging.ERROR, logging.CRITICAL, ]: logger.log(level=level, msg="foobarbaz") @pipeline(mode_defs=[default_mode_def_for_test]) def my_pipeline(): my_solid2(my_solid1()) return my_pipeline
def sort_by_calories(context, cereals): sorted_cereals = sorted(cereals, key=lambda cereal: int(cereal["calories"])) least_caloric = sorted_cereals[0]["name"] most_caloric = sorted_cereals[-1]["name"] logger = get_dagster_logger() logger.info(f"Least caloric cereal: {least_caloric}") logger.info(f"Most caloric cereal: {most_caloric}") fieldnames = list(sorted_cereals[0].keys()) sorted_cereals_csv_path = os.path.abspath( f"output/calories_sorted_{context.run_id}.csv") os.makedirs(os.path.dirname(sorted_cereals_csv_path), exist_ok=True) with open(sorted_cereals_csv_path, "w") as fd: writer = csv.DictWriter(fd, fieldnames) writer.writeheader() writer.writerows(sorted_cereals) yield AssetMaterialization( asset_key="sorted_cereals_csv", description="Cereals data frame sorted by caloric content", metadata={ "sorted_cereals_csv_path": EventMetadata.path(sorted_cereals_csv_path) }, ) yield Output(None)
def sort_by_calories(cereals): sorted_cereals = sorted(cereals, key=lambda cereal: cereal["calories"]) logger = get_dagster_logger() logger.info("Least caloric cereal: {least_caloric}".format( least_caloric=sorted_cereals[0]["name"])) logger.info("Most caloric cereal: {most_caloric}".format( most_caloric=sorted_cereals[-1]["name"]))
def ambitious_op(): my_logger = get_dagster_logger() try: x = 1 / 0 return x except ZeroDivisionError: my_logger.error("Couldn't divide by zero!") return None
def my_solid1(): if inside: logger = logging.getLogger("my_logger_inside") if python else get_dagster_logger() else: logger = outside_logger for level in [ logging.DEBUG, logging.INFO, ]: logger.log(level, "foobar%s", "baz")
def __init__( self, logger: Optional[logging.Logger] = None, ): """Constructor Args: logger (Optional[Any]): A property for injecting a logger dependency. Default is ``None``. """ self._logger = logger or get_dagster_logger()
def my_solid2(_in): if inside: logger = logging.getLogger("my_logger_inside") if python else get_dagster_logger() else: logger = outside_logger for level in [ logging.WARNING, logging.ERROR, logging.CRITICAL, ]: logger.log(level=level, msg="foobarbaz")
def __init__( self, api_key: str, api_secret: str, disable_schedule_on_trigger: bool = True, request_max_retries: int = 3, request_retry_delay: float = 0.25, log: logging.Logger = get_dagster_logger(), ): self._auth = HTTPBasicAuth(api_key, api_secret) self._disable_schedule_on_trigger = disable_schedule_on_trigger self._request_max_retries = request_max_retries self._request_retry_delay = request_retry_delay self._log = log
def __init__( self, host: str, port: str, use_https: bool, request_max_retries: int = 3, request_retry_delay: float = 0.25, log: logging.Logger = get_dagster_logger(), ): self._host = host self._port = port self._use_https = use_https self._request_max_retries = request_max_retries self._request_retry_delay = request_retry_delay self._log = log
def __init__( self, auth_token: str, account_id: int, disable_schedule_on_trigger: bool = True, request_max_retries: int = 3, request_retry_delay: float = 0.25, dbt_cloud_host: str = DBT_DEFAULT_HOST, log: logging.Logger = get_dagster_logger(), log_requests: bool = False, ): self._auth_token = auth_token self._account_id = account_id self._disable_schedule_on_trigger = disable_schedule_on_trigger self._request_max_retries = request_max_retries self._request_retry_delay = request_retry_delay self._dbt_cloud_host = dbt_cloud_host self._log = log self._log_requests = log_requests
def _load_manifest_for_project( project_dir: str, profiles_dir: str, target_dir: str, select: str ) -> Tuple[Mapping[str, Any], DbtCliOutput]: # running "dbt ls" regenerates the manifest.json, which includes a superset of the actual # "dbt ls" output cli_output = execute_cli( executable="dbt", command="ls", log=get_dagster_logger(), flags_dict={ "project-dir": project_dir, "profiles-dir": profiles_dir, "select": select, "resource-type": "model", "output": "json", }, warn_error=False, ignore_handled_error=False, target_path=target_dir, ) manifest_path = os.path.join(target_dir, "manifest.json") with open(manifest_path, "r") as f: return json.load(f), cli_output
def find_sugariest(cereals): sorted_by_sugar = sorted(cereals, key=lambda cereal: cereal["sugars"]) get_dagster_logger().info( f'{sorted_by_sugar[-1]["name"]} is the sugariest cereal')
def bad_download_csv(): response = requests.get("https://docs.dagster.io/assets/cereal.csv") lines = response.text.split("\n") get_dagster_logger().info(f"Read {len(lines)} lines") return ["not_a_dict"]
def display_results(most_calories, most_protein): logger = get_dagster_logger() logger.info(f"Most caloric cereal: {most_calories}") logger.info(f"Most protein-rich cereal: {most_protein}")
def download_csv(): response = requests.get("https://docs.dagster.io/assets/cereal.csv") lines = response.text.split("\n") get_dagster_logger().info(f"Read {len(lines)} lines") return [row for row in csv.DictReader(lines)]
def display_results(most_calories: str, most_protein: str) -> Nothing: """Example of a Dagster op that takes inputs but does not produce output.""" logger = get_dagster_logger() logger.info(f"Most caloric cereal: {most_calories}") logger.info(f"Most protein-rich cereal: {most_protein}")
def sort_by_calories(cereals): sorted_cereals = sorted(cereals, key=lambda cereal: int(cereal["calories"])) get_dagster_logger().info(f'Most caloric cereal: {sorted_cereals[-1]["name"]}')