def run(): if len(sys.argv) == 2 and sys.argv[1] == "stats": return compute_stats_for_tokenized(load_tokenized_strings()) elif len(sys.argv) == 2 and sys.argv[1] == "initial_buckets": return to_jsonisable( with_packed_patterns(bucketize(load_tokenized_strings()))) elif len(sys.argv) == 2 and sys.argv[1] == "initial_refined_buckets": return to_jsonisable( with_packed_patterns( initial_refined_buckets(load_tokenized_strings()))) elif len(sys.argv) == 2 and sys.argv[1] == "buckets": return to_jsonisable( with_packed_patterns(make_buckets(load_tokenized_strings()))) elif (len(sys.argv) == 5 or len(sys.argv) == 4) and sys.argv[1] == "annotate_lines": group_field = sys.argv[4] if len(sys.argv) == 5 else None data = load_data() data_groups = { None: data if group_field is None else grouped_data(data, group_field) } for group_id, group_data in data_groups.items(): argv_ = sys.argv[2] sys_argv_ = sys.argv[3] annotate_lines(group_data, classify_field=argv_, result_field=sys_argv_) return data
def run(): if len(sys.argv) == 2 and sys.argv[1] == "stats": return compute_stats() elif len(sys.argv) == 2 and sys.argv[1] == "run_columns": return compute_run_columns() elif len(sys.argv) == 2 and sys.argv[1] == "aggregate_runs": return aggregate_runs() elif len(sys.argv) == 2 and sys.argv[1] == "all_column_names": return to_jsonisable(compute_all_column_names()) elif len(sys.argv) == 2 and sys.argv[1] == "column_value_run_lengths": return to_jsonisable( compute_median_column_value_run_lengths( compute_all_column_names())) elif len(sys.argv) == 2 and sys.argv[1] == "value_relations": return compute_value_relations() elif len(sys.argv) == 2 and sys.argv[1] == "column_relations": return compute_column_relations() elif len(sys.argv) == 2 and sys.argv[1] == "column_relations_graph": return column_relations_graph() elif len(sys.argv) == 2 and sys.argv[1] == "column_equivalence_graph": return column_equivalence_graph(compute_column_relations()) elif len(sys.argv) == 2 and sys.argv[1] == "column_relations_digraph": return to_jsonisable(column_relations_digraph()) elif len(sys.argv ) == 2 and sys.argv[1] == "column_relations_digraph_pruned": return to_jsonisable(column_relations_digraph_pruned()) elif len(sys.argv) == 2 and sys.argv[1] == "column_families": return to_jsonisable( compute_column_families(compute_all_column_names())) elif len(sys.argv) == 2 and sys.argv[1] == "auto_aggregation_groups": return auto_aggregation_groups() elif len(sys.argv) == 3 and sys.argv[1] == "auto_aggregate_by_groups": return auto_aggregate_by_groups(json.loads(sys.argv[2])) elif len(sys.argv) == 3 and sys.argv[1] == "group_runs_by": return compute_group_runs_by(json.loads(sys.argv[2])) elif len(sys.argv) == 2 and sys.argv[1] == "auto_aggregate": return auto_aggregate()
def child_by_path(value, path: Tuple[Hashable, ...]) -> Optional[Hashable]: """ returns ... if path is not applicable to value """ for key in path: if value is None: return ... if isinstance(value, dict): if key in value: value = value.get(key) else: return ... elif isinstance(key, int): if 0 <= key < len(value): value = value[key] else: print('value', value) print('path', path) print(key) raise ValueError return value if __name__ == "__main__": import sys import json from datatools.json.util import to_jsonisable json.dump( to_jsonisable(Discovery().object_descriptor(json.load(sys.stdin))), sys.stdout)
elif len(sys.argv) == 2 and sys.argv[1] == "clusters": return compute_clusters() elif len(sys.argv) == 2 and sys.argv[1] == "clusters2": return compute_clusters2() else: return None @run_once def load_tokenized_strings(): return [[token for token in tokenize(s)] for s in load_lines()] @run_once def load_lines(): debug("Loading data") lines = [line.rstrip('\n') for line in sys.stdin] debug("done") return lines if __name__ == "__main__": output = run() if output is not None: if isinstance(output, GeneratorType): for o in output: json.dump(to_jsonisable(o), sys.stdout) print() else: json.dump(to_jsonisable(output), sys.stdout)