Exemplo n.º 1
0
def flow(parameters, *_):
    logging.info('Pulling latest code from COVID19-ISRAEL github repo')
    logging.info('COVID19_ISRAEL_REPOSITORY=%s' %
                 os.environ.get('COVID19_ISRAEL_REPOSITORY'))
    logging.info('pulling from origin/master')
    utils.subprocess_call_log(
        ['git', 'config', 'user.email', 'avid-covider-pipelines@localhost'],
        cwd='../COVID19-ISRAEL')
    utils.subprocess_call_log(
        ['git', 'config', 'user.name', 'avid-covider-pipelines'],
        cwd='../COVID19-ISRAEL')
    if utils.subprocess_call_log(['git', 'pull', 'origin', 'master'],
                                 cwd='../COVID19-ISRAEL') != 0:
        raise Exception('Failed to git pull')
    sha1 = subprocess.check_output(['git', 'rev-parse', 'HEAD'],
                                   cwd='../COVID19-ISRAEL').decode().strip()
    return Flow(
        iter([{
            'sha1': sha1
        }]),
        update_resource(-1,
                        name='github_pull_covid19_israel',
                        path='github_pull_covid19_israel.csv',
                        **{'dpp:streaming': True}), printer(),
        dump_to_path(
            parameters.get('dump_to_path', 'data/github_pull_covid19_israel')))
Exemplo n.º 2
0
def flow(parameters, *_):
    logging.info('Running COVID19-ISRAEL module %s' % parameters['module'])
    mtimes = {}
    sizes = {}
    hashes = {}
    for path in glob('../COVID19-ISRAEL/**', recursive=True):
        if os.path.isfile(path):
            mtimes[path] = os.path.getmtime(path)
            sizes[path] = os.path.getsize(path)
            hashes[path] = get_hash(path)
    if utils.subprocess_call_log(['python', '-u', '-m', parameters['module']],
                                 log_file=parameters.get('log_file'),
                                 cwd='../COVID19-ISRAEL') != 0:
        raise Exception('Failed to run COVID19-ISRAEL module %s' %
                        parameters['module'])
    resource_name = parameters.get('resource_name',
                                   'covid19_israel_updated_files')
    dump_to_path_name = parameters.get(
        'dump_to_path',
        'data/run_covid19_israel/last_updated_files/%s' % parameters['module'])
    printer_num_rows = parameters.get('printer_num_rows', 999)
    return Flow(
        get_updated_files(mtimes, sizes, hashes),
        update_resource(-1,
                        name=resource_name,
                        path='%s.csv' % resource_name,
                        **{'dpp:streaming': True}),
        *([printer(
            num_rows=printer_num_rows)] if printer_num_rows > 0 else []),
        *([dump_to_path(dump_to_path_name)] if dump_to_path_name else []))
def run_covid19_israel(parameters, run_row):
    run_row['github_sha1'] = globals().get('COVID19_ISRAEL_GITHUB_SHA1', '_')
    args = parameters.get('args')
    if not args:
        args = []
    cmd = ['python', '-u', '-m', parameters['module'], *args]
    # cmd = ['echo'] + cmd
    # cmd = ['bash', '-c', 'echo %s && false' % cmd]
    log_files_dir = os.path.join(parameters['output-dir'], 'log_files')
    os.makedirs(log_files_dir, exist_ok=True)
    log_filename = os.path.join(
        log_files_dir,
        '%s.log' % run_row['start_time'].strftime('%Y%m%dT%H%M%S'))
    if utils.subprocess_call_log(
            cmd, log_file=log_filename, cwd='../COVID19-ISRAEL') != 0:
        run_row['error'] = 'yes'
        logging.error('Failed to run COVID19-ISRAEL module %s with args %s' %
                      (parameters['module'], args))
    else:
        run_row['error'] = 'no'
        external_sharing_packages = parameters.get("external_sharing_packages")
        if external_sharing_packages:
            try:
                publish_external_sharing_packages.flow({
                    "packages":
                    external_sharing_packages
                }).process()
            except Exception:
                errmsg = "Failed to export external sharing packages for module %s with args %s" % (
                    parameters["module"], args)
                with open(log_filename, "a") as f:
                    f.write(errmsg)
                    traceback.print_exc(file=f)
                logging.exception(errmsg)
                run_row['error'] = 'yes'
Exemplo n.º 4
0
 def _process_packages():
     for package in parameters.get("packages", []):
         with open(
                 os.path.join("..", "COVID19-ISRAEL",
                              package["package_path"])) as f:
             package_descriptor = json.load(f)
         resources = {
             resource["name"]: resource
             for resource in package_descriptor["resources"]
         }
         for publish_target in package["publish_targets"]:
             assert "github_repo" in publish_target and "deploy_key" in publish_target and (
                 "files" in publish_target
                 or "files_foreach" in publish_target)
             with tempfile.TemporaryDirectory() as tmpdir:
                 source_deploy_key_file = os.environ[
                     "DEPLOY_KEY_FILE_" + publish_target["deploy_key"]]
                 deploy_key_file = os.path.join(tmpdir, "deploy_key")
                 shutil.copyfile(source_deploy_key_file, deploy_key_file)
                 os.chmod(deploy_key_file, 0o400)
                 gitenv = {
                     **os.environ, "GIT_SSH_COMMAND":
                     "ssh -i %s -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o IdentitiesOnly=yes"
                     % deploy_key_file
                 }
                 branch = publish_target.get("branch", "master")
                 repodir = os.path.join(tmpdir, "repo")
                 assert subprocess_call_log([
                     "git", "clone", "--depth", "1", "--branch", branch,
                     "[email protected]:%s.git" %
                     publish_target["github_repo"], repodir
                 ],
                                            env=gitenv) == 0
                 assert subprocess_call_log([
                     "git", "config", "user.name", "avid-covider-pipelines"
                 ],
                                            cwd=repodir) == 0
                 assert subprocess_call_log([
                     "git", "config", "user.email",
                     "avid-covider-pipelines@localhost"
                 ],
                                            cwd=repodir) == 0
                 num_added = 0
                 files = {**publish_target.get("files", {})}
                 for metadata_list_key, files_foreach in publish_target.get(
                         "files_foreach", {}).items():
                     for resource_name_template, target_path_template in files_foreach.items(
                     ):
                         for foreach_value in package_descriptor.get(
                                 metadata_list_key, []):
                             resource_name = resource_name_template.format(
                                 foreach_value=foreach_value,
                                 **package_descriptor)
                             target_path = target_path_template.format(
                                 foreach_value=foreach_value,
                                 **package_descriptor)
                             files[resource_name] = target_path
                 for resource_name, target_path_template in files.items():
                     target_path = target_path_template.format(
                         **package_descriptor)
                     target_fullpath = os.path.join(repodir, target_path)
                     if os.path.exists(target_fullpath) and get_hash(
                             target_fullpath
                     ) == resources[resource_name]["hash"]:
                         logging.info("File is not changed: %s" %
                                      resources[resource_name]["path"])
                         continue
                     source_path = os.path.join(
                         "..", "COVID19-ISRAEL",
                         resources[resource_name]["path"])
                     logging.info(
                         "%s: %s --> %s" %
                         (resource_name, source_path, target_fullpath))
                     shutil.copyfile(source_path, target_fullpath)
                     assert subprocess_call_log(["git", "add", target_path],
                                                cwd=repodir) == 0
                     num_added += 1
                 if num_added > 0:
                     logging.info("Committing %s changes" % num_added)
                     assert subprocess_call_log([
                         "git", "commit", "-m",
                         "automated update from hasadna/avid-covider-pipelines"
                     ],
                                                cwd=repodir) == 0
                     assert subprocess_call_log(
                         ["git", "push", "origin", branch],
                         cwd=repodir,
                         env=gitenv) == 0
                 else:
                     logging.info("No changes to commit")
         yield {
             "name": package_descriptor["name"],
             "datetime": package_descriptor["datetime"],
             "hash": package_descriptor["hash"]
         }
Exemplo n.º 5
0
def flow(parameters, *_):
    logging.info('Pulling latest code from COVID19-ISRAEL github repo')
    logging.info('COVID19_ISRAEL_REPOSITORY=%s' %
                 os.environ.get('COVID19_ISRAEL_REPOSITORY'))
    logging.info('COVID19_ISRAEL_BRANCH=%s' %
                 os.environ.get('COVID19_ISRAEL_BRANCH'))
    if not os.environ.get('COVID19_ISRAEL_REPOSITORY'):
        logging.info(
            'skipping pull because COVID19_ISRAEL_REPOSITORY env var is empty')
        logging.info('using env var COVID19_ISRAEL_SHA1 for the sha1')
        logging.info('COVID19_ISRAEL_SHA1=' +
                     os.environ.get('COVID19_ISRAEL_SHA1', "_"))
        sha1 = os.environ.get('COVID19_ISRAEL_SHA1', "_")
    else:
        utils.subprocess_call_log([
            'git', 'config', 'user.email', 'avid-covider-pipelines@localhost'
        ],
                                  cwd='../COVID19-ISRAEL')
        utils.subprocess_call_log(
            ['git', 'config', 'user.name', 'avid-covider-pipelines'],
            cwd='../COVID19-ISRAEL')
        branch = os.environ.get('COVID19_ISRAEL_BRANCH')
        if branch:
            logging.info('Pulling from origin/' + branch)
            if utils.subprocess_call_log(['git', 'fetch', 'origin'],
                                         cwd='../COVID19-ISRAEL') != 0:
                raise Exception('Failed to fetch origin')
            if utils.subprocess_call_log(['git', 'checkout', branch],
                                         cwd='../COVID19-ISRAEL') != 0:
                raise Exception('Failed to switch branch')
            if utils.subprocess_call_log(['git', 'pull', 'origin', branch],
                                         cwd='../COVID19-ISRAEL') != 0:
                raise Exception('Failed to git pull')
        else:
            logging.info('pulling from origin/master')
            if utils.subprocess_call_log(['git', 'pull', 'origin', 'master'],
                                         cwd='../COVID19-ISRAEL') != 0:
                raise Exception('Failed to git pull')
        sha1 = subprocess.check_output(
            ['git', 'rev-parse', 'HEAD'],
            cwd='../COVID19-ISRAEL').decode().strip()
    # sha1 = subprocess.check_output(['cat', '/pipelines/data/fake-sha1'], cwd='../COVID19-ISRAEL').decode().strip()
    if parameters.get('change-run-covid'):
        with open('avid_covider_pipelines/run_covid19_israel.py', 'r') as f:
            lines = f.readlines()
        with open('avid_covider_pipelines/run_covid19_israel.py', 'w') as f:
            for i, line in enumerate(lines):
                if i == 0:
                    if line.startswith('COVID19_ISRAEL_GITHUB_SHA1 = '):
                        line = 'COVID19_ISRAEL_GITHUB_SHA1 = "%s"\n' % sha1
                    else:
                        f.write('COVID19_ISRAEL_GITHUB_SHA1 = "%s"\n' % sha1)
                f.write(line)
    return Flow(
        iter([{
            'sha1': sha1
        }]),
        update_resource(-1,
                        name='github_pull_covid19_israel',
                        path='github_pull_covid19_israel.csv',
                        **{'dpp:streaming': True}), printer(),
        dump_to_path(
            parameters.get('dump_to_path', 'data/github_pull_covid19_israel')))