def create_tables_from_basiskaartdb_to_masterdb(source_connection, source_select_statement, target_base_table, *args, **kwargs): """ Source_connection contains the environment variable name (as defined in the docker-compose.yml) of the source connection i.e. AIRFLOW_CONN_POSTGRES_BASISKAART Source_select_statement contains the SQL select query that will be executed on the source DB. Target_base_table contains the table in master DB where the data (the result of source_select_statement execution) is inserted into. """ try: # setup the DB source connection source_engine = create_engine(env(source_connection).split("?")[0]) except SQLAlchemyError as e: raise Exception(str(e)) from e # fetch data from source DB with source_engine.connect() as source_connection: count = 0 cursor = source_connection.execute(source_select_statement) while True: fetch_iterator = cursor.fetchmany(size=import_step) batch_count = copy_data_in_batch(target_base_table, fetch_iterator) count += batch_count if batch_count < import_step: break logger.info(f"Total records imported: {count}")
def copy_data_from_dbwaarnemingen_to_masterdb(*args, **kwargs): try: waarnemingen_engine = create_engine( env("AIRFLOW_CONN_POSTGRES_DBWAARNEMINGEN").split("?")[0]) except SQLAlchemyError as e: raise Exception(str(e)) from e with waarnemingen_engine.connect() as waarnemingen_connection: count = 0 cursor = waarnemingen_connection.execute(f""" SET TIME ZONE 'Europe/Amsterdam'; WITH cmsa_1h_v6 AS ( SELECT sensor , date_trunc('hour'::text, timestamp_rounded) AS datum_uur , SUM(total_count) AS aantal_passanten FROM cmsa_15min_view_v6_materialized WHERE timestamp_rounded > to_date('2019-01-01'::text, 'YYYY-MM-DD'::text) GROUP BY sensor, (date_trunc('hour'::text,timestamp_rounded))) SELECT v.sensor, s.location_name, v.datum_uur, v.aantal_passanten, s.gebied, s.geom as geometrie FROM cmsa_1h_v6 v JOIN peoplemeasurement_sensors s ON s.objectnummer::text = v.sensor::text; """) while True: fetch_iterator = cursor.fetchmany(size=import_step) batch_count = copy_data_in_batch(fetch_iterator) count += batch_count if batch_count < import_step: break print(f"Imported: {count}")
def main(): parser = argparse.ArgumentParser() parser.add_argument("--token", type=str, required=True, help="github token") args = parser.parse_args() repo = env("GITHUB_REPOSITORY") milestones = get_latest_milestones(repo) milestones.sort(key=lambda milestone: milestone["title"], reverse=True) patch_version = get_patch_version() milestone_version = f"v{patch_version}" current_milestone = next((milestone for milestone in milestones if milestone["title"] == milestone_version), None) # do nothing if milestone with expected title is already created if current_milestone is not None: return prev_milestone_version = f"v{patch_version - 1}" prev_milestone = next((milestone for milestone in milestones if milestone["title"] == prev_milestone_version), None) due_on = None if prev_milestone is not None and prev_milestone["due_on"] is not None: date = datetime.strptime(prev_milestone["due_on"], "%Y-%m-%dT%H:%M:%SZ") + timedelta(weeks=2) due_on = "%04d-%02d-%02dT%02d:%02d:%02dZ" % (date.year, date.month, date.day, date.hour, date.minute, date.second) maintainers = [ "Undin", "ortem", "mchernyavsky", "vlad20012", "dima74", "avrong", "ozkriff" ] for m in milestones: if len(maintainers) == 1: break desc = m["description"] res = re.search(RELEASE_MANAGER_RE, desc) if res is not None: try: maintainers.remove(res.group(1)) except ValueError: pass description = f"Release manager: @{maintainers[0]}" create_milestone(repo, args.token, milestone_version, description=description, due_on=due_on)
def test(): signal.alarm(45) with env(HOROVOD_STALL_CHECK_TIME_SECONDS="2", HOROVOD_STALL_SHUTDOWN_TIME_SECONDS="5"): hvd.init() tensor = torch.IntTensor([[1, 2], [3, 4]]) if hvd.rank() != 0: time.sleep(10 * hvd.rank()) try: summed = hvd.allreduce(tensor, average=False) except: pass finally: hvd.shutdown()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--token", type=str, required=True, help="github token") args = parser.parse_args() repo = env("GITHUB_REPOSITORY") nightly_branch = get_branch(repo, args.token, NIGHTLY_BRANCH) if nightly_branch is not None: print("Repo already has nightly branch") return git_command("checkout", "-b", NIGHTLY_BRANCH) output = execute_command("rustc", "-V") match_result = RUSTC_VERSION_RE.match(output) date = match_result.group(1) with open(CHECK_WORKFLOW_PATH) as f: workflow_text = f.read() result = re.search(WORKFLOW_RUSTC_VERSION_RE, workflow_text) if result is None: raise ValueError("Failed to find the current version of nightly rust") new_workflow_text = re.sub(WORKFLOW_RUSTC_VERSION_RE, f"\\g<1>{date}\\g<2>", workflow_text) if new_workflow_text == workflow_text: print("The latest nightly rustc version is already used") return with open(CHECK_WORKFLOW_PATH, "w") as f: f.write(new_workflow_text) if has_git_changes(): git_command("add", CHECK_WORKFLOW_PATH) git_command("commit", "-m", ":arrow_up: nightly") git_command("push", "origin", NIGHTLY_BRANCH) pull_request = create_pull_request(repo, args.token, NIGHTLY_BRANCH, ":arrow_up: nightly") add_assignee(repo, args.token, pull_request["number"], DEFAULT_ASSIGNEE) else: print("Everything is up to date")
def main(): parser = argparse.ArgumentParser() parser.add_argument("--token", type=str, required=True, help="github token") args = parser.parse_args() repo = env("GITHUB_REPOSITORY") updater = NightlyUpdater(repo, args.token, branch_name="nightly", message=":arrow_up: nightly", assignee="Undin") updater.update()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--token", type=str, required=True, help="github token") args = parser.parse_args() repo = env("GITHUB_REPOSITORY") updater = CompilerFeatureUpdater(repo, args.token, branch_name="update-compiler-features", message="Update compiler features", assignee="Undin") updater.update()
def test_timeline(self): with tempfile.NamedTemporaryFile() as t: with env(HOROVOD_TIMELINE=t.name, HOROVOD_TIMELINE_MARK_CYCLES='1'): hvd.init() # Perform a simple allreduce operation hvd.allreduce(torch.tensor([1, 2, 3], dtype=torch.float32), name='test_allreduce') # Wait for it to register in the timeline. time.sleep(0.1) if hvd.rank() == 0: with open(t.name, 'r') as tf: timeline_text = tf.read() assert 'allreduce.test_allreduce' in timeline_text, timeline_text assert 'NEGOTIATE_ALLREDUCE' in timeline_text, timeline_text assert 'ALLREDUCE' in timeline_text, timeline_text assert 'CYCLE_START' in timeline_text, timeline_text
def main() -> None: parser = argparse.ArgumentParser() parser.add_argument("--token", type=str, required=True) parser.add_argument("--repo_owner", type=str, required=True) parser.add_argument("--repo_name", type=str, required=True) args = parser.parse_args() repo = env("GITHUB_REPOSITORY") # the script is supposed to be invoked only after release branch creation # so we need previous patch version release_patch_version = get_patch_version() - 1 changelog_repo = f"{args.repo_owner}/{args.repo_name}" branch_name = changelog_branch_name(release_patch_version) branches = get_all_branches(changelog_repo, args.token) existing_branch = next( (branch["name"] for branch in branches if branch["name"].endswith(branch_name)), None) if existing_branch is not None: print( f"Branch for v{release_patch_version} release already exists: `{existing_branch}`" ) return milestone = get_current_milestone(repo, release_patch_version) # TODO: find out more correct way to parse data release_date = datetime.strptime(milestone["due_on"], "%Y-%m-%dT%H:%M:%SZ").date() today = date.today() if today >= release_date or milestone["state"] == "closed": print(f"Milestone v{release_patch_version} is over") return delta = release_date - today five_days = timedelta(5) if delta > five_days: print("Too early to create release changelog") return add_changelog_template(args.token, release_patch_version, args.repo_name) create_changelog_pull_request(changelog_repo, args.token, release_patch_version, milestone)
from postgres_table_init_operator import PostgresTableInitOperator from postgres_table_copy_operator import PostgresTableCopyOperator from postgres_permissions_operator import PostgresPermissionsOperator from http_gob_operator import HttpGobOperator from common import ( default_args, DATAPUNT_ENVIRONMENT, MessageOperator, slack_webhook_token, env, ) from schematools import TMP_TABLE_POSTFIX from schematools.utils import schema_def_from_url MAX_RECORDS = 1000 if DATAPUNT_ENVIRONMENT == "development" else None GOB_PUBLIC_ENDPOINT = env("GOB_PUBLIC_ENDPOINT") GOB_SECURE_ENDPOINT = env("GOB_SECURE_ENDPOINT") OAUTH_TOKEN_EXPIRES_MARGIN = env.int("OAUTH_TOKEN_EXPIRES_MARGIN", 5) SCHEMA_URL = env("SCHEMA_URL") dag_id = "gob" owner = "gob" graphql_path = pathlib.Path(__file__).resolve().parents[0] / "graphql" @dataclass class DatasetInfo: """Dataclass to provide canned infomation about the dataset for other operators to work with."""
import argparse from urllib.request import urlopen from common import env, get_patch_version_from_text from github import get_current_milestone, set_milestone if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--token", type=str, help="GitHub token", required=True) parser.add_argument("--pull-request", type=int, help="Pull request number", required=True) args = parser.parse_args() repo = env("GITHUB_REPOSITORY") text = urlopen(f"https://github.com/{repo}/raw/master/gradle.properties" ).read().decode("utf-8") patch_version = get_patch_version_from_text(text) milestone = get_current_milestone(repo, patch_version) set_milestone(args.token, repo, args.pull_request, milestone["number"])
from dynamic_dagrun_operator import TriggerDynamicDagRunOperator from sqlalchemy_create_object_operator import SqlAlchemyCreateObjectOperator from postgres_table_init_operator import PostgresTableInitOperator from postgres_table_copy_operator import PostgresTableCopyOperator from http_gob_operator import HttpGobOperator from common import ( default_args, DATAPUNT_ENVIRONMENT, MessageOperator, slack_webhook_token, env, ) from schematools import TMP_TABLE_POSTFIX MAX_RECORDS = 1000 if DATAPUNT_ENVIRONMENT == "development" else None GOB_PUBLIC_ENDPOINT = env("GOB_PUBLIC_ENDPOINT") GOB_SECURE_ENDPOINT = env("GOB_SECURE_ENDPOINT") OAUTH_TOKEN_EXPIRES_MARGIN = env.int("OAUTH_TOKEN_EXPIRES_MARGIN", 5) dag_id = "gob" owner = "gob" graphql_path = pathlib.Path(__file__).resolve().parents[0] / "graphql" def create_gob_dag(is_first, gob_dataset_name, gob_table_name): gob_db_table_name = f"{gob_dataset_name}_{gob_table_name}" graphql_dir_path = graphql_path / f"{gob_dataset_name}-{gob_table_name}" graphql_params_path = graphql_dir_path / "args.json" extra_kwargs = {}
from sql.wior import ( DROP_COLS, SQL_DROP_TMP_TABLE, SQL_GEOM_VALIDATION, SQL_ADD_PK, SQL_SET_DATE_DATA_TYPES, ) dag_id: str = "wior" variables: Dict = Variable.get(dag_id, deserialize_json=True) data_endpoint: Dict = variables["data_endpoints"]["wfs"] tmp_dir: str = f"{SHARED_DIR}/{dag_id}" data_file: str = f"{tmp_dir}/{dag_id}.geojson" db_conn: DatabaseEngine = DatabaseEngine() password: str = env("AIRFLOW_CONN_WIOR_PASSWD") user: str = env("AIRFLOW_CONN_WIOR_USER") base_url: str = URL(env("AIRFLOW_CONN_WIOR_BASE_URL")) total_checks: list = [] count_checks: list = [] geo_checks: list = [] to_zone: Optional[tzinfo] = tz.gettz("Europe/Amsterdam") class DataSourceError(Exception): """Custom exeception for not available data source""" pass # data connection