Exemplos de open_job_db em Python, exemplos de conseq.dep.open_job_db em Python

Exemplo n.º 1

0

Exibir arquivo

def print_spaces(state_dir):
    db_path = os.path.join(state_dir, "db.sqlite3")
    j = dep.open_job_db(db_path)
    current_space = j.get_current_space()
    for space in j.get_spaces():
        selected = "*" if current_space == space else " "
        print("{} {}".format(selected, space))

Exemplo n.º 2

0

Exibir arquivo

Arquivo: commands.py Projeto: broadinstitute/conseq

def gc(state_dir):
    db_path = os.path.join(state_dir, "db.sqlite3")

    if not os.path.exists(state_dir) or not os.path.exists(db_path):
        log.warning(
            "Nothing to do (No such directory: {} or missing db.sqlite3 file)".
            format(state_dir))
        return

    j = dep.open_job_db(db_path)

    all_job_dirs = [
        os.path.join(state_dir, fn) for fn in os.listdir(state_dir)
        if re.match("r[0-9]+", fn)
    ]
    job_dirs_in_use = set(
        [e.job_dir for e in j.get_all_executions() if e.job_dir is not None])

    # make sure the jobdirs are a subset of all the job dirs we've found
    # print("job in use", list(job_dirs_in_use)[:10])
    # print("all_job_dirs", list(all_job_dirs)[:10])
    assert job_dirs_in_use.issubset(all_job_dirs)

    for job_dir in all_job_dirs:
        if job_dir in job_dirs_in_use:
            continue

        # one more final check because we're about to blow away a directory
        assert job_dir.startswith(state_dir)
        log.warning("Removing unused directory: %s", job_dir)
        shutil.rmtree(job_dir)

    j.gc()

Exemplo n.º 3

0

Exibir arquivo

def test_limit_to_rule(tmpdir):
    jobdb = str(tmpdir.join("db"))

    j = dep.open_job_db(jobdb)

    def is_template1(inputs, transform):
        return transform == "template1"

    j.limitStartToTemplates([is_template1])

    # two templates which don't require any inputs.
    template1 = dep.Template([], [], "template1")
    template2 = dep.Template([], [], "template2")
    # one template which requires an object.
    template3 = dep.Template([dep.ForEach("contexts", dict(type="a"))], [],
                             "template3")

    j.add_template(template1)
    j.add_template(template2)
    j.add_template(template3)

    # After adding those templates, we should have only created an execution for template1
    len(j.get_pending()) == 1

    # however if we add an object, template3 can also execute
    j.add_obj("public", 1, dict(type="a"))
    len(j.get_pending()) == 2

Exemplo n.º 4

0

Exibir arquivo

def test_completion(tmpdir):
    jobdb = str(tmpdir.join("db"))

    templates = [
        dep.Template([dep.ForEach("contexts", dict(type="contexts"))], [],
                     "MakeContexts"),
        dep.Template([dep.ForEach("context", dict(type="context"))], [],
                     "PerContext")
    ]
    j = dep.open_job_db(jobdb)
    for t in templates:
        j.add_template(t)

    j.add_obj("public", 1, dict(type="contexts", name="a"))
    j.refresh_rules()
    pending = (j.get_pending())
    assert len(pending) == 1

    rule_exec_id = pending[0].id

    execution_id = j.record_started(rule_exec_id)
    j.record_completed(
        execution_id, pending[0].id, dep.STATUS_COMPLETED,
        [dict(name="a", type="context"),
         dict(name="b", type="context")])

    j.refresh_rules()
    assert len(j.get_pending()) == 2
    for p in j.get_pending():
        assert p.transform == "PerContext"

Exemplo n.º 5

0

Exibir arquivo

def test_input_changed(tmpdir):
    jobdb = str(tmpdir.join("db"))

    templates = [
        dep.Template([dep.ForEach("contexts", dict(type="contexts"))], [],
                     "MakeContexts")
    ]
    j = dep.open_job_db(jobdb)
    for t in templates:
        j.add_template(t)

    j.add_obj("public", 1, dict(type="contexts", name="a"))
    j.refresh_rules()
    pending = j.get_pending()
    assert len(pending) == 1
    exec_id = j.record_started(pending[0].id)
    j.record_completed(
        exec_id, pending[0].id, dep.STATUS_COMPLETED,
        [dict(name="a", type="context"),
         dict(name="b", type="context")])

    j.refresh_rules()
    pending = j.get_pending()
    assert len(pending) == 0

    j.add_obj("public", 2, dict(type="contexts", name="a"))
    j.refresh_rules()
    assert len(j.get_pending()) == 1

Exemplo n.º 6

0

Exibir arquivo

def test_overwrite_obj(tmpdir):
    jobdb = str(tmpdir.join("db"))

    j = dep.open_job_db(jobdb)
    j.add_template(
        dep.Template([dep.ForEach("in", {"A": "a"})], [], "transform"))

    # add the object, which results in a rule execution
    id1 = j.add_obj("public", 1, {"A": "a", "mut": {"$value": "1"}})
    objs = j.find_objs("public", {"A": "a"})
    assert len(objs) == 1
    j.refresh_rules()
    assert len(j.get_pending()) == 1
    assert len(j.get_all_executions()) == 0
    rule_exec_id1 = j.get_pending()[0].id

    # add object with same "key" which should result in the old rule execution being replaced
    id2 = j.add_obj("public", 2, {"A": "a", "mut": {"$value": "2"}})
    objs = j.find_objs("public", {"A": "a"})
    assert len(objs) == 1
    j.refresh_rules()
    assert len(j.get_pending()) == 1
    # there should still only be a single rule, but it should be a new rule with the new input
    rule_exec_id2 = j.get_pending()[0].id
    assert id1 != id2
    assert rule_exec_id1 != rule_exec_id2

    # now, try with a different key to make sure it's not that all objects overwrite one another
    id1 = j.add_obj("public", 1, {"B": "b", "mut": {"$filename": "1"}})
    objs = j.find_objs("public", {"B": "b"})
    assert len(objs) == 1
    id2 = j.add_obj("public", 2, {"B": "b", "mut": {"$filename": "2"}})
    objs = j.find_objs("public", {"B": "b"})
    assert len(objs) == 1
    assert id1 != id2

Exemplo n.º 7

0

Exibir arquivo

Arquivo: commands.py Projeto: broadinstitute/conseq

def downstream_cmd(state_dir, space, predicates):

    j = dep.open_job_db(os.path.join(state_dir, "db.sqlite3"))
    if space is None:
        space = j.get_current_space()

    from collections import defaultdict

    rules_by_obj_id = defaultdict(lambda: set())

    all_rules = j.get_all_executions()
    for rule in all_rules:
        if rule.status == "canceled":
            continue

        for name, value in rule.inputs:
            if not isinstance(value, tuple):
                value = [value]
            for v in value:
                rules_by_obj_id[v.id].add(rule.transform)

    # print(rules_by_obj_id)

    subset = j.find_objs(space, dict(predicates))
    for o in subset:
        print(f"artifact {o} has the following downstream:")
        downstreams = j.find_all_reachable_downstream_objs([o.id])
        for downstream in downstreams:
            rules = rules_by_obj_id[downstream.id]
            downstream_id = downstream.id
            print(f"  {downstream_id}: rules {rules}")
        print("")

Exemplo n.º 8

0

Exibir arquivo

Arquivo: depexec_test.py Projeto: broadinstitute/conseq

def _run_with_config(tmpdir, counters, config_str):
    state_dir = str(tmpdir.join("state"))
    depfile = str(
        tmpdir.join(
            str(md5(config_str.encode('utf-8')).hexdigest()) + ".conseq"))
    print("writing", depfile)
    with open(depfile, "wt") as fd:
        fd.write(config_str)

    depexec.main(depfile,
                 state_dir,
                 forced_targets=[],
                 override_vars={},
                 max_concurrent_executions=1,
                 capture_output=False,
                 req_confirm=False,
                 config_file=None,
                 remove_unknown_artifacts=True)

    db_path = os.path.join(state_dir, "db.sqlite3")
    j = dep.open_job_db(db_path)
    # copy counters onto j for testing
    for k, v in counters.items():
        setattr(j, k, v)
    return j

Exemplo n.º 9

0

Exibir arquivo

Arquivo: depexec_test.py Projeto: broadinstitute/conseq

def test_obj_reconcile(tmpdir):
    db_path = os.path.join(str(tmpdir), "db.sqlite3")

    # depexec.main(filename, state_dir, targets, {}, 10, False, False, None)
    j = dep.open_job_db(db_path)

    # verify empty
    objs = j.find_objs(dep.DEFAULT_SPACE, {})
    assert len(objs) == 0

    jinja2_env = create_jinja2_env()

    vars = {}
    objs = [{"type": "a"}, {"type": "b"}]
    reconcile_db(j, jinja2_env, {}, objs, vars, force=True)

    # verify two objects were created
    objs = j.find_objs(dep.DEFAULT_SPACE, {})
    assert len(objs) == 2

    objs = [{"type": "a"}, {"type": "b"}, {"type": "c"}]
    reconcile_db(j, jinja2_env, {}, objs, vars, force=True)

    # type=c is the new object, getting us to 3
    objs = j.find_objs(dep.DEFAULT_SPACE, {})
    assert len(objs) == 3

    # now if we drop type=a, we should be back down to two objects
    objs = [{"type": "b"}, {"type": "c"}]
    reconcile_db(j, jinja2_env, {}, objs, vars, force=True)

    # type=c is the new object, getting us to 3
    objs = j.find_objs(dep.DEFAULT_SPACE, {})
    assert len(objs) == 2

Exemplo n.º 10

0

Exibir arquivo

Arquivo: commands.py Projeto: broadinstitute/conseq

def print_history(state_dir):
    j = dep.open_job_db(os.path.join(state_dir, "db.sqlite3"))
    for exec_ in j.get_all_executions():

        lines = []
        lines.append("  inputs:")
        for name, value in exec_.inputs:
            if isinstance(value, dep.Obj):
                value = [value]
            lines.append("    {}:".format(name))
            for _value in value:
                for k, v in _value.props.items():
                    lines.append("      {}: {}".format(k, v))

        if len(exec_.outputs) > 0:
            lines.append("  outputs:")
            for value in exec_.outputs:
                for k, v in value.props.items():
                    lines.append("    {}: {}".format(k, v))

        print("rule {}: (execution id: {}, status: {})".format(
            exec_.transform, exec_.id, exec_.status))
        for line in lines:
            print(line)

        print("")

Exemplo n.º 11

0

Exibir arquivo

Arquivo: commands.py Projeto: broadinstitute/conseq

def rm_cmd(state_dir, dry_run, space, query):
    j = dep.open_job_db(os.path.join(state_dir, "db.sqlite3"))
    if space is None:
        space = j.get_current_space()

    root_objs = j.find_objs(space, query)
    root_obj_ids = [o.id for o in root_objs]

    remove_obj_and_children(j, root_obj_ids, dry_run)

Exemplo n.º 12

0

Exibir arquivo

Arquivo: commands.py Projeto: broadinstitute/conseq

def forget_cmd(state_dir, rule_name, is_pattern):
    j = dep.open_job_db(os.path.join(state_dir, "db.sqlite3"))

    if is_pattern:
        pattern = re.compile(rule_name)
        transforms = [
            x.transform for x in j.get_all_executions()
            if pattern.match(x.transform)
        ]
    else:
        transforms = [rule_name]

    for transform in transforms:
        j.invalidate_rule_execution(transform)

Exemplo n.º 13

0

Exibir arquivo

Arquivo: commands.py Projeto: broadinstitute/conseq

def localize_cmd(state_dir, space, predicates, depfile, config_file):
    rules = read_rules(state_dir, depfile, config_file)

    resolver = xref.Resolver(state_dir, rules.vars)

    j = dep.open_job_db(os.path.join(state_dir, "db.sqlite3"))
    if space is None:
        space = j.get_current_space()
    subset = j.find_objs(space, dict(predicates))
    for obj in subset:
        for k, v in obj.props.items():
            if isinstance(v, dict) and "$file_url" in v:
                url = v["$file_url"]
                r = resolver.resolve(url)
                log.info("resolved %s to %s", url, r)

Exemplo n.º 14

0

Exibir arquivo

Arquivo: depquery_ui.py Projeto: broadinstitute/conseq

def main(state_dir):
    db_path = os.path.join("/Users/pmontgom/dev/crispr-analyses/state",
                           "db.sqlite3")
    j = dep.open_job_db(db_path)
    instances = j.find_objs({})
    instances = [x.props for x in instances]
    store = depquery.AugmentedStore(instances)

    app = flask.Flask(__name__)
    app.store = store
    api = Api(app)
    api.add_resource(GetInstances, "/api/instances")
    api.add_resource(FindProps, "/api/props")
    api.add_resource(FindPropValues, "/api/values")

    app.run(debug=True)

Exemplo n.º 15

0

Exibir arquivo

def test_foreach(tmpdir):
    jobdb = str(tmpdir.join("db"))

    templates = [
        dep.Template([dep.ForEach("contexts", dict(type="contexts"))], [],
                     "MakeContexts")
    ]
    j = dep.open_job_db(jobdb)
    for t in templates:
        j.add_template(t)

    j.add_obj("public", 1, dict(type="contexts", name="a"))
    j.refresh_rules()
    assert len(j.get_pending()) == 1
    j.add_obj("public", 1, dict(type="contexts", name="b"))
    j.refresh_rules()
    assert len(j.get_pending()) == 2

Exemplo n.º 16

0

Exibir arquivo

def test_stuff(tmpdir):
    jobdb = str(tmpdir.join("db"))
    templates = [
        dep.Template([dep.ForEach("contexts", dict(type="contexts"))], [],
                     "MakeContexts"),
        dep.Template([
            dep.ForEach("avana_lib",
                        dict(type="crispr_dataset", library="Avana")),
            dep.ForAll("gecko_libs", dict(library="Gecko"))
        ], [], "AvanaGeckoMerge"),
        dep.Template([
            dep.ForEach("dataset", dict(type="crispr_dataset")),
            dep.ForEach("context", dict(type="context"))
        ], [], "CalculateEnrichment"),
        dep.Template([
            dep.ForEach("dataset", dict(type="crispr_dataset")),
            dep.ForEach("parameters", dict(type="atlantis_params"))
        ], [], "RunAtlantis")
    ]

    j = dep.open_job_db(jobdb)
    for t in templates:
        j.add_template(t)

    def execute(execution_id, transform, inputs):
        if transform == "MakeContexts":
            j.record_completed(2, execution_id, dep.STATUS_COMPLETED, [
                dict(name="a", type="context"),
                dict(name="b", type="context")
            ])

    j.add_obj("public", 1, dict(type="contexts"))
    j.add_obj("public", 1, dict(type="atlantis_params", parameters="p1"))
    j.add_obj("public", 1, dict(type="atlantis_params", parameters="p2"))
    j.add_obj("public", 1, dict(type="crispr_dataset", library="Avana"))
    j.add_obj("public", 1, dict(type="crispr_dataset", library="Gecko"))

    j.refresh_rules()
    for pending in j.get_pending():
        execute(pending.id, pending.transform, pending.inputs)

    print(j.to_dot(True))

Exemplo n.º 17

0

Exibir arquivo

Arquivo: commands.py Projeto: broadinstitute/conseq

def debugrun(state_dir, depfile, target, override_vars, config_file):
    db_path = os.path.join(state_dir, "db.sqlite3")
    print("opening", db_path)
    j = dep.open_job_db(db_path)

    rules = read_rules(state_dir, depfile, config_file)

    for var, value in override_vars.items():
        rules.set_var(var, value)

    rule = rules.get_rule(target)
    queries, predicates = convert_input_spec_to_queries(
        rules.jinja2_env, rule, rules.vars)
    for q in queries:
        t = dep.Template([q], [], rule.name)
        applications = j.query_template(t)
        log.info("{} matches for {}".format(len(applications), q))

    applications = j.query_template(
        dep.Template(queries, predicates, rule.name))
    log.info("{} matches for entire rule".format(len(applications)))

Exemplo n.º 18

0

Exibir arquivo

def generate_report_cmd(state_dir, dest_dir):
    from .template import create_template_jinja2_env
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)

    jinja2_env = create_template_jinja2_env()

    def all_objs_props(objs, exclude=[]):
        props = set()
        for obj in objs:
            props.update(obj.props.keys())
        props = props.difference(exclude)
        return sorted(props)

    def prop_summary(obj):
        result = []
        for name, value in obj.props.items():
            if isinstance(value, dict) and "$value" in value:
                value = value["$value"]
            value = str(value)
            if len(value) > 40:
                value = value[:5] + "..." + value[-20:]
            result.append((name, value))
        return sorted(result)

    import jinja2

    def value_cell(value):
        if isinstance(value, dict):
            if "$value" in value:
                return jinja2.Markup(
                    f"<td class='transient-value'>{jinja2.escape(value['$value'])}</td>"
                )
            elif "$filename" in value:
                return jinja2.Markup(
                    f"<td class='filename-value'>{jinja2.escape(value['$filename'])}</td>"
                )
            elif "$file_url" in value:
                return jinja2.Markup(
                    f"<td class='file-url-value'>{jinja2.escape(value['$file_url'])}</td>"
                )
        return jinja2.Markup(f"<td>{jinja2.escape(str(value))}</td>")

    jinja2_env.filters.update({
        "all_objs_props": all_objs_props,
        "prop_summary": prop_summary,
        "value_cell": value_cell,
        'is_tuple': lambda x: isinstance(x, tuple)
    })
    j = open_job_db(os.path.join(state_dir, "db.sqlite3"))

    objs = j.find_objs(DEFAULT_SPACE, {})
    executions = j.get_all_executions()
    execution_by_input = defaultdict(lambda: [])
    execution_by_output = {}
    for execution in executions:
        for name, input in execution.inputs:
            if not isinstance(input, tuple):
                inputs = [input]
            else:
                inputs = input
            for input in inputs:
                execution_by_input[input.id].append(execution)
        for output in execution.outputs:
            execution_by_output[output.id] = execution

    index_template = jinja2_env.get_template("index.html")

    obj_template = jinja2_env.get_template("artifact.html")

    execution_template = jinja2_env.get_template("execution.html")

    def write_artifact(obj: Obj):
        fn = f"{dest_dir}/obj_{obj.id}.html"
        with open(fn, "wt") as fd:
            execution = execution_by_output.get(obj.id)
            downstream_executions = execution_by_input[obj.id]
            fd.write(
                obj_template.render(
                    obj=obj,
                    execution=execution,
                    downstream_executions=downstream_executions))

    def get_disk_usage(job_dir):
        if job_dir is None:
            return 0

        size = 0
        for fn in os.listdir(job_dir):
            fn_path = os.path.join(job_dir, fn)
            size += os.path.getsize(fn_path)
        return size

    def write_execution(execution, disk_usage):
        fn = f"{dest_dir}/exec_{execution.id}.html"

        files = []
        if execution.job_dir:
            for output_fn in os.listdir(execution.job_dir):
                files.append((os.path.relpath(
                    os.path.join(execution.job_dir, output_fn),
                    dest_dir), output_fn))

        with open(fn, "wt") as fd:
            fd.write(
                execution_template.render(execution=execution,
                                          files=files,
                                          disk_usage=disk_usage))

    ExecSummary = namedtuple("ExecSummary", "execs disk_usage")
    execs_by_name = defaultdict(lambda: ExecSummary([], 0))
    objs_by_type = defaultdict(lambda: [])
    # write a file per object
    for obj in objs:
        write_artifact(obj)
        objs_by_type[obj.props.get("type", "")].append(obj)

    # write a file per execution
    for execution in executions:
        disk_usage = get_disk_usage(execution.job_dir)
        write_execution(execution, disk_usage)
        execs, total_disk_usage = execs_by_name[execution.transform]
        execs_by_name[execution.transform] = ExecSummary(
            execs + [execution], total_disk_usage + disk_usage)

    rules_with_size = [(name, summary.disk_usage)
                       for name, summary in execs_by_name.items()]
    rules_with_size.sort(key=lambda x: x[1], reverse=True)

    with open(f"{dest_dir}/index.html", "wt") as fd:
        sorted_objs_by_type = sorted(objs_by_type.items())
        sorted_execs_by_name = sorted(execs_by_name.items())
        fd.write(
            index_template.render(objs_by_type=sorted_objs_by_type,
                                  execs_by_name=sorted_execs_by_name,
                                  rules_with_size=rules_with_size))

Exemplo n.º 19

0

Exibir arquivo

Arquivo: commands.py Projeto: broadinstitute/conseq

def ls_cmd(state_dir, space, predicates, groupby, columns):
    from tabulate import tabulate
    from conseq import depquery

    cache_db = xref.open_cache_db(state_dir)

    j = dep.open_job_db(os.path.join(state_dir, "db.sqlite3"))
    if space is None:
        space = j.get_current_space()
    subset = j.find_objs(space, dict(predicates))
    subset = [o.props for o in subset]

    def print_table(subset, indent):
        if len(subset) > 1 and columns == None:
            counts = depquery.count_unique_values_per_property(subset)
            common_keys, variable_keys = depquery.split_props_by_counts(counts)
            common_table = [[subset[0][k] for k in common_keys]]
            if len(common_keys) > 0:
                print(
                    indent_str(
                        "Properties shared by all {} rows:".format(
                            len(subset)), indent))
                print(
                    indent_str(
                        tabulate(common_table, common_keys, tablefmt="simple"),
                        indent + 2,
                    ))

        elif columns != None:
            variable_keys = columns
        else:
            # remaining case: columns == None and len(subset) == 1
            variable_keys = list(subset[0].keys())

        variable_table = []
        for row in subset:
            full_row = []
            for k in variable_keys:
                v = row.get(k)
                if isinstance(v, dict) and "$file_url" in v:
                    cache_rec = cache_db.get(v["$file_url"])
                    if cache_rec is not None:
                        v = {"$filename": cache_rec[0]}
                full_row.append(str(v))
            variable_table.append(full_row)
        print(
            indent_str(
                tabulate(variable_table, variable_keys, tablefmt="simple"),
                indent))

    if groupby == None:
        print_table(subset, 0)
    else:
        by_pred = collections.defaultdict(lambda: [])
        for row in subset:
            by_pred[row.get(groupby)].append(row)

        for group, rows in by_pred.items():
            print("For {}={}:".format(groupby, group))
            print_table(rows, 2)
            print()

Exemplo n.º 20

0

Exibir arquivo

Arquivo: commands.py Projeto: broadinstitute/conseq

def dot_cmd(state_dir, detailed):
    j = dep.open_job_db(os.path.join(state_dir, "db.sqlite3"))
    print(j.to_dot(detailed))

Exemplo n.º 21

0

Exibir arquivo

Arquivo: commands.py Projeto: broadinstitute/conseq

def list_cmd(state_dir):
    j = dep.open_job_db(os.path.join(state_dir, "db.sqlite3"))
    j.dump()

Exemplo n.º 22

0

Exibir arquivo

Arquivo: commands.py Projeto: broadinstitute/conseq

def export_cmd(state_dir, depfile, config_file, dest_s3_path):
    out = StringIO()

    rules = read_rules(state_dir, depfile, config_file)
    j = dep.open_job_db(os.path.join(state_dir, "db.sqlite3"))

    objs = j.find_objs(DEFAULT_SPACE, {})
    print(len(objs))
    vars = rules.vars

    cas_remote = helper.Remote(
        vars["S3_STAGING_URL"],
        ".",
        helper.S3StorageConnection(vars["AWS_ACCESS_KEY_ID"],
                                   vars["AWS_SECRET_ACCESS_KEY"]),
    )

    def process_value(value):
        if isinstance(value, dict):
            if "$filename" in value:
                url = cas_remote.upload_to_cas(value["$filename"])
                value = {"$file_url": url}
        return value

    def process_filenames(obj: Obj):
        translated = {}
        for key, value in obj.props.items():
            if isinstance(value, list) or isinstance(value, tuple):
                value = [process_value(x) for x in value]
            else:
                value = process_value(value)
            translated[key] = value

        if "$manually-added" not in translated:
            translated["$manually-added"] = {"$value": "false"}

        return translated

    def reindent(s, ident):
        indent_str = " " * ident
        lines = s.split("\n")

        return "\n".join([lines[0]] + [indent_str + x for x in lines[1:]])

    for obj in objs:
        try:
            props = process_filenames(obj)
        except Exception as e:
            raise Exception(
                "Could not process filenames in artifact: {}".format(
                    repr(obj))) from e
        out.write("add-if-missing {}\n\n".format(reindent(
            json.dumps(props), 3)))

    def get_key_props(obj):
        props = {}
        for key, value in obj.props.items():
            if isinstance(value, dict) and (("$filename" in value) or
                                            ("$file_url" in value) or
                                            ("$value" in value)):
                continue
            props[key] = value
        return props

    def value_as_json(value):
        if isinstance(value, tuple):
            return json.dumps([get_key_props(x) for x in value], indent=3)
        else:
            return json.dumps(get_key_props(value), indent=3)

    executions = j.get_all_executions()
    skipped = 0
    for execution in executions:
        if execution.status != "completed":
            skipped += 1
            continue

        out.write('remember-executed transform : "{}"\n'.format(
            execution.transform))
        for input in execution.inputs:
            out.write('   input "{}" : {}\n'.format(
                input[0], reindent(value_as_json(input[1]), 3)))
        for output in execution.outputs:
            out.write("   output : {}\n".format(
                reindent(value_as_json(output), 3)))
        out.write("\n")

    log.info(
        "Skipping export of %d executions which did not complete successfully",
        skipped)
    if dest_s3_path.startswith("s3://"):
        log.info("Uploading artifact metadata to %s", dest_s3_path)
        cas_remote.upload_str(dest_s3_path, out.getvalue())
    else:
        log.info("Writing artifacts to %s", dest_s3_path)
        with open(dest_s3_path, "wt") as fd:
            fd.write(out.getvalue())

Exemplo n.º 23

0

Exibir arquivo

def main(depfile: str,
         state_dir: str,
         forced_targets: List[Any],
         override_vars: Dict[Any, Any],
         max_concurrent_executions: int,
         capture_output: bool,
         req_confirm: bool,
         config_file: str,
         maxfail: int = 1,
         maxstart: None = None,
         force_no_targets: bool = False,
         reattach_existing=None,
         remove_unknown_artifacts=None,
         properties_to_add=[],
         rule_filter=None) -> int:
    assert max_concurrent_executions > 0

    if not os.path.exists(state_dir):
        os.makedirs(state_dir)

    db_path = os.path.join(state_dir, "db.sqlite3")
    j = dep.open_job_db(db_path)

    # handle case where we explicitly state some templates to execute.  Make sure nothing else executes
    if len(forced_targets) > 0 or force_no_targets:
        forced_rule_names = force_execution_of_rules(j, forced_targets)
    else:
        forced_rule_names = []

    if rule_filter:
        assert len(forced_targets
                   ) == 0, "Cannot specify allowed rules and forced rules"
        # because force_execution_of_rules() call limitStartToTemplates
        # and one will clobber the state of the other
        j.limitStartToTemplates([rule_filter])

    rules = read_rules(state_dir, depfile, config_file, initial_config={})
    rule_specifications = rules.get_rule_specifications()
    jinja2_env = rules.jinja2_env

    if rules.get_client("default", must=False) is None:
        rules.add_client("default", exec_client.LocalExecClient({}))
    # override with max_concurrent_executions
    rules.get_client("default").resources["slots"] = max_concurrent_executions

    for var, value in override_vars.items():
        rules.set_var(var, value)

    # handle the "add-if-missing" objects and changes to rules
    reconcile_db(j,
                 jinja2_env,
                 rule_specifications,
                 rules.objs,
                 rules.vars,
                 force=remove_unknown_artifacts)

    # handle the remember-executed statements
    with j.transaction():
        for exec_ in rules.remember_executed:
            j.remember_executed(exec_)

    # finish initializing exec clients
    for name, props in list(rules.exec_clients.items()):
        if isinstance(props, dict):
            config = rules.get_vars()
            props = expand_dict(jinja2_env, props, config)

            class VirtualDict():
                def __getitem__(self, key):
                    value = rules.get_vars()[key]
                    return render_template(jinja2_env, value, config)

                def get(self, key, default=None):
                    value = rules.get_vars().get(key, default)
                    if value is None:
                        return None
                    return render_template(jinja2_env, value, config)

            client = exec_client.create_client(name, VirtualDict(), props)
            rules.add_client(name, client)

    # Reattach or cancel jobs from previous invocation
    executing = []
    pending_jobs = j.get_started_executions()
    if len(pending_jobs) > 0:
        log.warning(
            "Reattaching jobs that were started in a previous invocation of conseq, but had not terminated before conseq exited: %s",
            pending_jobs)

        if reattach_existing is None:
            reattach_existing = ui.user_wants_reattach()

        if reattach_existing:
            executing = reattach(j, rules, pending_jobs)
        else:
            pending_jobs = j.get_started_executions()
            for e in pending_jobs:
                log.warning(
                    "Canceling {} which was started from earlier execution".
                    format(e.id))
                j.cancel_execution(e.id)

    # any jobs killed or other failures need to be removed so we'll attempt to re-run them
    j.cleanup_unsuccessful()

    assert len(j.get_pending()) == 0

    for dec in rules:
        try:
            j.add_template(to_template(jinja2_env, dec, rules.vars))
        except MissingTemplateVar as ex:
            log.error("Could not load rule {}: {}".format(
                dec.name, ex.get_error()))
            return -1

    # now check the rules we requested exist
    for rule_name in forced_rule_names:
        if not (j.has_template(rule_name)):
            raise Exception("No such rule: {}".format(rule_name))

    def new_object_listener(obj):
        timestamp = datetime.datetime.now().isoformat()
        j.add_obj(timestamp, obj)

    try:
        ret = main_loop(jinja2_env,
                        j,
                        new_object_listener,
                        rules,
                        state_dir,
                        executing,
                        capture_output,
                        req_confirm,
                        maxfail,
                        maxstart,
                        properties_to_add=properties_to_add)
    except FatalUserError as e:
        print("Error: {}".format(e))
        return -1

    return ret

Exemplo n.º 24

0

Exibir arquivo

def select_space(state_dir, name, create_if_missing):
    db_path = os.path.join(state_dir, "db.sqlite3")
    j = dep.open_job_db(db_path)
    j.select_space(name, create_if_missing)

Exemplo n.º 25

0

Exibir arquivo

Arquivo: commands.py Projeto: broadinstitute/conseq

def lsexec(state_dir):
    j = dep.open_job_db(os.path.join(state_dir, "db.sqlite3"))
    executions = j.get_all_executions()
    for execution in executions:
        _print_execution(execution)

Exemplo n.º 26

0

Exibir arquivo

Arquivo: depexec_test.py Projeto: broadinstitute/conseq

def test_rule_reconcile(tmpdir, monkeypatch):
    counters = {}

    def increment_counter_callback(field):
        def fn(*args, **kwargs):
            counters[field] += 1

        return fn

    # record the number of times these functions have been called
    monkeypatch.setattr(dep, 'signal_add_obj',
                        increment_counter_callback('new_artifacts'))
    monkeypatch.setattr(dep, 'signal_remove_obj',
                        increment_counter_callback('del_artifacts'))

    def run_with_config(config_str):
        counters.update({'new_artifacts': 0, 'del_artifacts': 0})
        return _run_with_config(tmpdir, counters, config_str)

    db_path = os.path.join(str(tmpdir), "db.sqlite3")

    # depexec.main(filename, state_dir, targets, {}, 10, False, False, None)
    j = dep.open_job_db(db_path)

    # create a few artifacts
    j = run_with_config("""
        rule a:
            outputs: {"type": "a-out"}    
        rule b:
            outputs: {"type": "b-out"}    
        rule c1:
            outputs: {"type": "c1-out"}    
    """)

    assert j.new_artifacts == 3
    assert j.del_artifacts == 0

    # now run again, but with an additional rule. Shouldn't delete anything, just add one more artifact
    j = run_with_config("""
        rule a:
            outputs: {"type": "a-out"}    
        rule b:
            outputs: {"type": "b-out"}    
        rule c1:
            outputs: {"type": "c1-out"}    
        rule c2:
            inputs: in={"type": "c1-out"}
            outputs: {"type": "c2-out"}    
    """)

    assert j.new_artifacts == 1
    assert j.del_artifacts == 0

    # but if we change a rule, we should delete that artifact and create a new one
    j = run_with_config("""
        rule a:
            outputs: {"type": "a-out"}    
        rule b:
            outputs: {"type": "b-out-2"}   
        rule c1:
            outputs: {"type": "c1-out"}    
        rule c2:
            inputs: in={"type": "c1-out"}
            outputs: {"type": "c2-out"}    
    """)
    assert j.new_artifacts == 1
    assert j.del_artifacts == 1

    # Lastly, changing a rule should delete downstream artifacts too
    j = run_with_config("""
        rule a:
            outputs: {"type": "a-out"}    
        rule b:
            outputs: {"type": "b-out-2"}   
        rule c1:
            outputs: {"type": "c1-out-2"}    
        rule c2:
            inputs: in={"type": "c1-out"}
            outputs: {"type": "c2-out"}    
    """)
    assert j.new_artifacts == 1
    assert j.del_artifacts == 2