def read_file(*components, **kwargs): """ Load a JSON data blob. Arguments: path (str): Path to file. must_exist (bool, otional): If False, return empty dict if file does not exist. Returns: array or dict: JSON data. Raises: File404: If path does not exist, and must_exist is True. InvalidFile: If JSON is malformed. """ must_exist = kwargs.get("must_exist", True) if must_exist: path = fs.must_exist(*components) else: path = fs.path(*components) try: with open(path) as infile: return loads(infile.read()) except ValueError as e: raise ValueError( "malformed JSON file '{path}'. Message from parser: {err}".format( path=fs.basename(path), err=str(e))) except IOError as e: if not must_exist: return {} else: return e
def main(): import sys log.init(verbose=True) m = model.from_json(clgen.load_json_file(sys.argv[1])) s = sampler.from_json({ "kernels": { "args": [ "__global float*", "__global float*", "__global float*", "const int" ], "max_length": 5000, "temperature": 1 }, "sampler": { "batch_size": 1000, "max_batches": 1, "static_checker": False, "dynamic_checker": False } }) print("Corpus size:", m.corpus.size) print("Vocab size: ", m.corpus.vocab_size) print() clgen.platform_info() print() outpath = "./benchmark-" + fs.basename(sys.argv[1]) info = evaluate(m, s) clgen.write_file(outpath, clgen.format_json(info))
def _msg(i, x): n = i + 1 filename = fs.basename(x[0]) lineno = x[1] fnname = x[2] loc = "{filename}:{lineno}".format(**vars()) return " #{n} {loc: <18} {fnname}()".format(**vars())
def from_bin(path: Path = "gslang", session: session_t = None) -> List['Testbed']: import cldrive with ReuseSession(session) as s: basename = fs.basename(path) version = Testbed._get_version(path) platform = get_or_add(s, Platform, platform=basename, version=version, host=cldrive.host_os()) s.flush() return [ get_or_add(s, Testbed, platform_id=platform.id, optimizations=True), ]
def main(): log.init(verbose=True) m = model.from_json(clgen.load_json_file(sys.argv[1])) c = corpus.Corpus.from_json({"path": "~/data/github"}) print("CLgen: ", clgen.version()) print("Corpus size:", c.size) print("Vocab size: ", c.vocab_size) m.train() p, _ = corpus.most_common_prototypes(c, 20) for i, row in enumerate(p): outpath = "./inference-p" + str(i + 1) + "-" + fs.basename(sys.argv[1]) if fs.exists(outpath): print("skipped result for", outpath) continue else: print("starting result for", outpath) _, prototype = row argspec = [' '.join(x.split()[:-1]) for x in prototype.split(',')] print("argspec", ','.join([str(x) for x in argspec])) s = sampler.from_json({ "kernels": { "args": argspec, "max_length": 5000 }, "sampler": { "batch_size": 2000, "max_batches": 1, "static_checker": False, "dynamic_checker": False } }) info = evaluate(m, s) clgen.write_file(outpath, clgen.format_json(info))
def load_data_desc(platform, source="B", max_seq_len=1000, atomizer=CharacterAtomizer, quiet=False): """ load experimental results """ def get_benchmarks(platform): B = pd.read_csv( fs.path("runtimes/{platform}-benchmarks.csv".format(**vars()))) B["source"] = [escape_suite_name(x) for x in B["benchmark"]] B["synthetic"] = [0] * len(B) return B def get_npb_benchmarks(platform): B = get_benchmarks(platform) msk = B["source"] == "NPB" return B[msk] def get_synthetics(platform): S = pd.read_csv( fs.path("runtimes/{platform}-clgen.csv".format(**vars()))) S["source"] = ["CLgen"] * len(S) S["synthetic"] = [1] * len(S) return S if source == "B": dataframe = get_benchmarks(platform) elif source == "S": dataframe = get_synthetics(platform) elif source == "BS": dataframe = pd.concat( (get_benchmarks(platform), get_synthetics(platform))) elif source == "N": dataframe = get_npb_benchmarks(platform) elif source == "NS": dataframe = pd.concat( (get_npb_benchmarks(platform), get_synthetics(platform))) else: raise Exception dataframe["oracle_enc"] = [ 1 if x == "GPU" else 0 for x in dataframe["oracle"].values ] dataframe["benchmark_name"] = [ escape_benchmark_name(b) for b in dataframe["benchmark"].values ] # load source code: source_dir = fs.path("kernels") srcs, benchmark_names = [], [] for row in dataframe["benchmark"].values: inpath = fs.path(source_dir, row + ".cl") with open(inpath) as infile: src = infile.read() if not src.startswith("__kernel void A"): print(fs.basename(inpath)) raise Exception(src) srcs.append(src) dataframe["src"] = srcs dataframe["src_len"] = [len(s) for s in srcs] if not quiet: print("num instances {} ({} synthetic, {} benchmarks)".format( len(dataframe), sum(dataframe["synthetic"].values), len(dataframe) - sum(dataframe["synthetic"].values))) print("unique kernels", len(set(srcs))) # encode and pad sequences: atomizer = atomizer.from_text(''.join(dataframe["src"].values)) seqs = [atomizer.atomize(seq) for seq in dataframe["src"].values] seq_length = min(max(len(s) for s in seqs), max_seq_len) pad_val = atomizer.vocab_size + 1 dataframe["seq_len"] = [len(s) for s in seqs] dataframe["seq"] = list( pad_sequences(seqs, maxlen=seq_length, value=pad_val)) if not quiet: print("vocab size", atomizer.vocab_size + 1) print("pad val", pad_val) print("padded seq length", seq_length) return { "dataframe": dataframe, "seq_length": seq_length, "atomizer": atomizer }
type=str, default="cc1", help="MySQL database hostname") args = parser.parse_args() db.init(args.hostname) with Session(commit=False) as s: # Export results # print("Exporting CLgen results ...") fs.mkdir("export/clgen/result") # Pick up where we left off done = set([ int(fs.basename(path)) for path in Path("export/clgen/result").iterdir() ]) print(len(done), "done") ids = set([x[0] for x in s.query(CLgenResult.id).all()]) print(len(ids), "in total") todo = ids - done print(len(todo), "todo") for result_id in ProgressBar()(todo): result = s.query(CLgenResult).filter( CLgenResult.id == result_id).scalar() with open(f"export/clgen/result/{result.id}", "w") as outfile: print(json.dumps({ "id":
def test_basename(): assert "foo" == fs.basename("foo") assert "foo" == fs.basename(fs.abspath("foo"))
def test_must_exist(): with tempfile.NamedTemporaryFile(prefix='labm8_') as f: assert fs.must_exist(f.name) == f.name assert fs.must_exist(fs.dirname(f.name), fs.basename(f.name)) == f.name with pytest.raises(fs.File404): fs.must_exist("/not/a/real/path")