def run():
    if len(sys.argv) == 2 and sys.argv[1] == "stats":
        return compute_stats_for_tokenized(load_tokenized_strings())
    elif len(sys.argv) == 2 and sys.argv[1] == "initial_buckets":
        return to_jsonisable(
            with_packed_patterns(bucketize(load_tokenized_strings())))
    elif len(sys.argv) == 2 and sys.argv[1] == "initial_refined_buckets":
        return to_jsonisable(
            with_packed_patterns(
                initial_refined_buckets(load_tokenized_strings())))
    elif len(sys.argv) == 2 and sys.argv[1] == "buckets":
        return to_jsonisable(
            with_packed_patterns(make_buckets(load_tokenized_strings())))
    elif (len(sys.argv) == 5
          or len(sys.argv) == 4) and sys.argv[1] == "annotate_lines":
        group_field = sys.argv[4] if len(sys.argv) == 5 else None
        data = load_data()
        data_groups = {
            None:
            data if group_field is None else grouped_data(data, group_field)
        }

        for group_id, group_data in data_groups.items():
            argv_ = sys.argv[2]
            sys_argv_ = sys.argv[3]
            annotate_lines(group_data,
                           classify_field=argv_,
                           result_field=sys_argv_)

        return data
def run():
    if len(sys.argv) == 2 and sys.argv[1] == "stats":
        return compute_stats()
    elif len(sys.argv) == 2 and sys.argv[1] == "run_columns":
        return compute_run_columns()
    elif len(sys.argv) == 2 and sys.argv[1] == "aggregate_runs":
        return aggregate_runs()
    elif len(sys.argv) == 2 and sys.argv[1] == "all_column_names":
        return to_jsonisable(compute_all_column_names())
    elif len(sys.argv) == 2 and sys.argv[1] == "column_value_run_lengths":
        return to_jsonisable(
            compute_median_column_value_run_lengths(
                compute_all_column_names()))
    elif len(sys.argv) == 2 and sys.argv[1] == "value_relations":
        return compute_value_relations()
    elif len(sys.argv) == 2 and sys.argv[1] == "column_relations":
        return compute_column_relations()
    elif len(sys.argv) == 2 and sys.argv[1] == "column_relations_graph":
        return column_relations_graph()
    elif len(sys.argv) == 2 and sys.argv[1] == "column_equivalence_graph":
        return column_equivalence_graph(compute_column_relations())
    elif len(sys.argv) == 2 and sys.argv[1] == "column_relations_digraph":
        return to_jsonisable(column_relations_digraph())
    elif len(sys.argv
             ) == 2 and sys.argv[1] == "column_relations_digraph_pruned":
        return to_jsonisable(column_relations_digraph_pruned())
    elif len(sys.argv) == 2 and sys.argv[1] == "column_families":
        return to_jsonisable(
            compute_column_families(compute_all_column_names()))
    elif len(sys.argv) == 2 and sys.argv[1] == "auto_aggregation_groups":
        return auto_aggregation_groups()
    elif len(sys.argv) == 3 and sys.argv[1] == "auto_aggregate_by_groups":
        return auto_aggregate_by_groups(json.loads(sys.argv[2]))
    elif len(sys.argv) == 3 and sys.argv[1] == "group_runs_by":
        return compute_group_runs_by(json.loads(sys.argv[2]))
    elif len(sys.argv) == 2 and sys.argv[1] == "auto_aggregate":
        return auto_aggregate()
示例#3
0

def child_by_path(value, path: Tuple[Hashable, ...]) -> Optional[Hashable]:
    """ returns ... if path is not applicable to value """
    for key in path:
        if value is None:
            return ...
        if isinstance(value, dict):
            if key in value:
                value = value.get(key)
            else:
                return ...
        elif isinstance(key, int):
            if 0 <= key < len(value):
                value = value[key]
        else:
            print('value', value)
            print('path', path)
            print(key)
            raise ValueError
    return value


if __name__ == "__main__":
    import sys
    import json
    from datatools.json.util import to_jsonisable
    json.dump(
        to_jsonisable(Discovery().object_descriptor(json.load(sys.stdin))),
        sys.stdout)
示例#4
0
    elif len(sys.argv) == 2 and sys.argv[1] == "clusters":
        return compute_clusters()
    elif len(sys.argv) == 2 and sys.argv[1] == "clusters2":
        return compute_clusters2()
    else:
        return None


@run_once
def load_tokenized_strings():
    return [[token for token in tokenize(s)] for s in load_lines()]


@run_once
def load_lines():
    debug("Loading data")
    lines = [line.rstrip('\n') for line in sys.stdin]
    debug("done")
    return lines


if __name__ == "__main__":
    output = run()
    if output is not None:
        if isinstance(output, GeneratorType):
            for o in output:
                json.dump(to_jsonisable(o), sys.stdout)
                print()
        else:
            json.dump(to_jsonisable(output), sys.stdout)