def prune(logger, feed, ed_path, stage):
    chlogger = logger.getChild(__name__)
    p = pre_prune(logger, feed, ed_path, stage)
    ext = STAGE_DIRS[stage]
    ending = ".%s" % ext
    try:
        files = filesystem.glob_dir(p, ending)
        count = 0
        for f in files:
            os.remove(os.path.join(p, f))
            count += 1
        log.debug(
            chlogger, {
                "name": __name__,
                "method": "prune",
                "path": ed_path,
                "feed": feed,
                "target_dir": p,
                "ending": ending,
                "removed": count,
                "message": "pruned target_dir",
            })
    except Exception as e:
        log.critical(
            chlogger, {
                "name": __name__,
                "method": "prune",
                "path": ed_path,
                "feed": feed,
                "target_dir": p,
                "ending": ending,
                "ERROR": "failed to prune target_dir",
                "exception": str(e)
            })
def restore_locally(logger, feed, ed_path, archive):
    chlogger = logger.getChild(__name__)
    tf = tarfile.open(archive)
    feed_dir = os.path.join(ed_path, 'data', feed)
    if os.path.exists(feed_dir):
        log.critical(
            chlogger, {
                "name": __name__,
                "method": "restore_locally",
                "path": ed_path,
                "feed": feed,
                "archive": archive,
                "feed_dir": feed_dir,
                "ERROR": "Must delete the feed_dir before restoring."
            })
        return
    else:
        try:
            tf.extractall(os.path.join(ed_path, 'data', feed))
            return feed_dir
        except Exception as e:
            log.critical(
                chlogger, {
                    "name": __name__,
                    "method": "restore_locally",
                    "path": ed_path,
                    "feed": feed,
                    "archive": archive,
                    "feed_dir": feed_dir,
                    "ERROR": "Failed to restore archive to feed_dir",
                    "exception": str(e)
                })
def archive_locally(logger, feed, ed_path, archivedir):
    chlogger = logger.getChild(__name__)
    try:
        archivedir1 = os.path.expanduser(archivedir)
        if archivedir1.startswith("/"):
            archivedire2 = archivedir1
        else:
            archivedir2 = os.path.join(ed_path, archivedir1)

        archive_name = os.path.join(archivedir2, feed)
        root_dir = os.path.expanduser(os.path.join(ed_path, 'data', feed))
        log.debug(
            chlogger, {
                "name": __name__,
                "method": "archive_locally",
                "path": ed_path,
                "feed": feed,
                "target_dir": archivedir2,
                "archive_name": archive_name,
                "root_dir": root_dir
            })
        return make_archive(archive_name, 'gztar', root_dir)
    except Exception as e:
        log.critical(
            chlogger, {
                "name": __name__,
                "method": "archive_locally",
                "path": ed_path,
                "feed": feed,
                "target_dir": archivedir2,
                "archive_name": archive_name,
                "root_dir": root_dir,
                "ERROR": "make archive failed",
                "exception": str(e)
            })
def status(logger, feed, ed_path, separator, header):
    chlogger = logger.getChild(__name__)
    target_dir = os.path.join(ed_path, 'data', feed)
    if not os.path.exists(target_dir):
        log.critical(
            chlogger, {
                "name": __name__,
                "method": "status",
                "path": ed_path,
                "feed": feed,
                "separator": separator,
                "header": header,
                "target_dir": target_dir,
                "ERROR": "target_dir does not exist"
            })
    if header:
        yield separator.join([
            "feed name", "downloaded", "unzipped", "parsed", "inserted",
            "databases"
        ])
    txtfiles = [
        "zip/state.txt", "xml/state.txt", "sql/state.txt", "db/state.txt",
        "save/state.txt"
    ]
    counts = [
        str(lines_in_file(os.path.join(target_dir, f))) for f in txtfiles
    ]
    status = [feed]
    status.extend(counts)
    yield separator.join(status)
def cli(ctx, ed_dir, log_level):
    """
    Command Line Interface for the Energy Dashboard. This tooling 
    collects information from a number of data feeds, imports that data, 
    transforms it, and inserts it into a database.
    """
    # pass this logger as a child logger to the edl methods
    log.configure_logging()
    logger = logging.getLogger(__name__)
    logger.setLevel(log_level)
    log.debug(
        logger, {
            "name": __name__,
            "method": "cli",
            "ed_dir": ed_dir,
            "log_level": "%s" % log_level
        })

    if ed_dir is None:
        ed_dir = os.path.curdir
    else:
        if not os.path.exists(ed_dir):
            log.critical(
                logger, {
                    "name": __name__,
                    "method": "cli",
                    "ed_dir": ed_dir,
                    "CRITICAL": "ed_dir does not exist"
                })
    eddir = os.path.abspath(os.path.expanduser(ed_dir))
    ctx.obj = {LOGGER: logger, EDDIR: eddir}
def restore_from_s3(logger, feed, ed_path, service):
    """
    Restore feed dist from an S3 bucket.

    It'd be easy if we could simply 'rclone' from the S3 service and
    have the entire bucket replicated here. I've not had any luck with
    that approach.

    Here's the brute force solution. Use the state files,
    '[xml|sql|db|save]/state.txt', to direct the download operations.  
    """
    chlogger = logger.getChild(__name__)
    url_tuples = s3_artifact_urls(chlogger, feed, ed_path, service)
    try:
        for (url, target) in url_tuples:
            r = requests.get(url)
            if r.status_code == 200:
                with open(target, 'wb') as fd:
                    for chunk in r.iter_content(chunk_size=128):
                        fd.write(chunk)
                logger.info(
                    chlogger, {
                        "name": __name__,
                        "method": "restore_from_s3",
                        "feed": feed,
                        "path": ed_path,
                        "service": service,
                        "url": url,
                        "target": target,
                        "message": "Restore succeeded",
                    })
            else:
                log.error(
                    chlogger, {
                        "name": __name__,
                        "method": "restore_from_s3",
                        "feed": feed,
                        "path": ed_path,
                        "service": service,
                        "url": url,
                        "target": target,
                        "ERROR": "Failed to retrieve artifact from S3",
                    })
                target_parts = os.path.splitext(target)
                if target_parts[1] == ".gz":
                    subprocess.run("pigz -d %s" % target)
            # return downloaded urls
            yield url
    except Exception as e:
        log.critical(
            chlogger, {
                "name": __name__,
                "method": "restore_from_s3",
                "feed": feed,
                "path": ed_path,
                "service": service,
                "ERROR": "Failed to restore from S3",
                "exception": str(e)
            })
def reset(logger, feed, ed_path, stage):
    chlogger = logger.getChild(__name__)
    p = pre_reset(logger, feed, ed_path, stage)
    try:
        if os.path.exists(p):
            shutil.rmtree(p)
            log.debug(
                chlogger, {
                    "name": __name__,
                    "method": "reset",
                    "path": ed_path,
                    "feed": feed,
                    "target_dir": p,
                    "message": "removed target_dir",
                })
    except Exception as e:
        log.critical(
            chlogger, {
                "name": __name__,
                "method": "reset",
                "path": ed_path,
                "feed": feed,
                "target_dir": p,
                "ERROR": "failed to remove target_dir",
                "exception": str(e)
            })
    try:
        if not os.path.exists(p):
            os.makedirs(p)
            log.debug(
                chlogger, {
                    "name": __name__,
                    "method": "reset",
                    "path": ed_path,
                    "feed": feed,
                    "target_dir": p,
                    "message": "makedirs target_dir",
                })
    except Exception as e:
        log.critical(
            chlogger, {
                "name": __name__,
                "method": "reset",
                "path": ed_path,
                "feed": feed,
                "target_dir": p,
                "ERROR": "failed to makedirs target_dir",
                "exception": str(e)
            })
    return p
def get_latest_xml_file(logger, feed, ed_path, xml_dir):
    """
    Return the latest xml file (sorted by name, which includes a timestamp).
    May not actually be the latest due to vagaries in the names.

    If this is ever a problem then stat the files and use the lmod times.
    """
    xml_files = sorted(list(fs.glob_dir(xml_dir, ".xml")))
    if len(xml_files) < 1:
        log.critical(
            logger, {
                "name": __name__,
                "method": "get_latest_xml_file",
                "feed": feed,
                "path": ed_path,
                "xml_dir": xml_dir,
                "ERROR": "Failed to parse xml_file",
            })
        return
    return xml_files[-1]
def scanner(logger, feed, ed_path, xmlfile):
    chlogger = logger.getChild(__name__)
    try:
        feed_dir = os.path.join(ed_path(), 'data', feed)
        xml_dir = os.path.join(feed_dir, 'xml')
        manifest = os.path.join(feed_dir, 'manifest.json')
        with open(manifest, 'r') as f:
            obj = json.loads(f.read())
        if not xmlfile.startswith(xml_dir):
            xmlfile = os.path.join(xml_dir, xmlfile)
        logger.debug(
            chlogger, {
                "name": __name__,
                "method": "scanner",
                "feed": feed,
                "path": ed_path,
                "feed_dir": feed_dir,
                "xml_dir": xml_dir,
                "xml_file": xml_file,
            })
        with open(xmlfile, 'r') as f:
            return xmlparser.XML2SQLTransormer(
                f).parse().scan_types().scan_tables()
    except Exception as e:
        log.critical(
            chlogger, {
                "name": __name__,
                "method": "scanner",
                "feed": feed,
                "path": ed_path,
                "feed_dir": feed_dir,
                "xml_dir": xml_dir,
                "xml_file": xml_file,
                "ERROR": "Failed to parse and scan xml_file",
                "exception": str(e)
            })
def process_all_stages(logger, feed, ed_path):
    chlogger = logger.getChild(__name__)
    found_src_files = src_files(logger, feed, ed_path)
    if len(found_src_files) < 1:
        log.critical(
            chlogger, {
                "name": __name__,
                "method": "process_all_stages",
                "path": ed_path,
                "feed": feed,
                "src_files": found_src_files,
                "ERROR": "No files found, nothing to process"
            })
        return
    log.debug(
        chlogger, {
            "name": __name__,
            "method": "process_all_stages",
            "path": ed_path,
            "feed": feed,
            "src_files": found_src_files
        })
    for src_file in found_src_files:
        yield process_file(logger, feed, ed_path, src_file)
def invoke(logger, feed, ed_path, command):
    chlogger = logger.getChild(__name__)
    target_dir = os.path.join(ed_path, 'data', feed)
    log.debug(
        chlogger, {
            "name": __name__,
            "method": "invoke",
            "path": ed_path,
            "feed": feed,
            "command": command
        })
    if not os.path.exists(target_dir):
        log.critical(
            chlogger, {
                "name": __name__,
                "method": "invoke",
                "path": ed_path,
                "feed": feed,
                "command": command,
                "target_dir": target_dir,
                "ERROR": "target_dir does not exist"
            })
    else:
        return runyield([command], target_dir)
def create(logger, ed_path, feed, maintainer, company, email, url, start_date,
           delay):
    """
    start_date : list of numbers : [2019,09,1]
    """
    chlogger = logger.getChild(__name__)
    new_feed_dir = os.path.join(ed_path, 'data', feed)
    try:
        os.mkdir(new_feed_dir)
        log.debug(
            chlogger, {
                "name": __name__,
                "method": "create",
                "path": ed_path,
                "feed": feed,
                "dir": new_feed_dir,
                "message": "created directory"
            })
        template_files = [
            "LICENSE", "Makefile", "README.md", "src/10_down.py",
            "src/20_unzp.py", "src/30_pars.py", "src/40_inse.py",
            "src/50_save.py", "src/60_dist.sh", "src/70_arch.py",
            "manifest.json"
        ]
        env = Environment(loader=PackageLoader('edl', 'templates'),
                          autoescape=select_autoescape(['py']))
        m = {
            'NAME': feed,
            'MAINTAINER': maintainer,
            'COMPANY': company,
            'EMAIL': email,
            'DATA_URL': url,
            'REPO_URL':
            "https://github.com/energy-analytics-project/%s" % feed,
            'START': start_date,
            'DELAY': delay
        }
        for tf in template_files:
            template = env.get_template(tf)
            target = os.path.join(new_feed_dir, tf)
            path = os.path.dirname(target)
            if not os.path.exists(path):
                os.makedirs(path)
            with open(target, 'w') as f:
                f.write(template.render(m))
                log.debug(
                    chlogger, {
                        "name": __name__,
                        "method": "create",
                        "path": ed_path,
                        "feed": feed,
                        "target": target,
                        "message": "rendered target"
                    })

        hidden_files = ['gitignore']
        for hf in hidden_files:
            template = env.get_template(hf)
            target = os.path.join(new_feed_dir, ".%s" % hf)
            with open(target, 'w') as f:
                f.write(template.render(m))
                log.debug(
                    chlogger, {
                        "name": __name__,
                        "method": "create",
                        "path": ed_path,
                        "feed": feed,
                        "target": target,
                        "message": "rendered target"
                    })
        for src_file in os.listdir(os.path.join(new_feed_dir, 'src')):
            fp = os.path.join(new_feed_dir, 'src', src_file)
            f = Path(fp)
            f.chmod(f.stat().st_mode | stat.S_IEXEC)
            log.debug(
                chlogger, {
                    "name": __name__,
                    "method": "create",
                    "path": ed_path,
                    "feed": feed,
                    "file": fp,
                    "message": "chmod +x"
                })

        for d in DIRS:
            os.makedirs(os.path.join(new_feed_dir, d))
        return feed
    except Exception as e:
        tb = traceback.format_exc()
        log.critical(
            chlogger, {
                "name": __name__,
                "method": "create",
                "path": ed_path,
                "feed": feed,
                "ERROR": "FAILED to create feed",
                "exception": str(e),
                "trace": str(tb),
            })