示例#1
0
def evaluate(model, sampler):
    """ evaluate sampling efficiency """
    print("starting sampling")
    sampler.sample(model)

    print("preprocessing sample")
    sample_db = sampler.cache(model)["kernels.db"]
    preprocess.preprocess_db(sample_db)

    num_kernels = dbutil.num_rows_in(sample_db, "ContentFiles")
    num_good_kernels = dbutil.num_good_kernels(sample_db)
    num_ugly_kernels = dbutil.num_rows_in(sample_db, "PreprocessedFiles",
                                          "WHERE status=2")
    discard_rate = 1 - (num_good_kernels / num_kernels)
    ugly_rate = 1 - (num_ugly_kernels / num_kernels)

    total_charcount = dbutil.cc(sample_db, "ContentFiles")
    good_charcount = dbutil.cc(sample_db,
                               "PreprocessedFiles",
                               condition="WHERE status=0")

    return {
        "argspec": sampler.kernel_opts["args"],
        "host": system.HOSTNAME,
        "date": time.nowstr(),
        "num_kernels": num_kernels,
        "num_good_kernels": num_good_kernels,
        "discard_rate": discard_rate,
        "ugly_rate": ugly_rate,
        "total_charcount": total_charcount,
        "good_charcount": good_charcount,
        "corpus_dir": model.corpus.cache.path,
        "model_dir": model.cache.path,
        "sampler_dir": sampler.cache(model).path,
    }
示例#2
0
def evaluate(model, sampler):
    """ evaluate sampling efficiency """
    print("starting sampling")
    sampler.sample(model)

    print("preprocessing sample")
    sample_db = sampler.cache(model)["kernels.db"]
    preprocess.preprocess_db(sample_db)

    num_kernels = dbutil.num_rows_in(sample_db, "ContentFiles")
    num_good_kernels = dbutil.num_good_kernels(sample_db)
    num_ugly_kernels = dbutil.num_rows_in(sample_db, "PreprocessedFiles",
                                          "WHERE status=2")
    discard_rate = 1 - (num_good_kernels / num_kernels)
    ugly_rate = 1 - (num_ugly_kernels / num_kernels)

    total_charcount = dbutil.cc(sample_db, "ContentFiles")
    good_charcount = dbutil.cc(sample_db, "PreprocessedFiles",
                               condition="WHERE status=0")

    return {
        "argspec": sampler.kernel_opts["args"],
        "host": system.HOSTNAME,
        "date": time.nowstr(),
        "num_kernels": num_kernels,
        "num_good_kernels": num_good_kernels,
        "discard_rate": discard_rate,
        "ugly_rate": ugly_rate,
        "total_charcount": total_charcount,
        "good_charcount": good_charcount,
        "corpus_dir": model.corpus.cache.path,
        "model_dir": model.cache.path,
        "sampler_dir": sampler.cache(model).path,
    }
示例#3
0
文件: benchmark.py 项目: SpringRi/phd
def evaluate(model, sampler):
    """ evaluate sampling efficiency """
    model.cache.empty()  # clear checkpoint cache
    print("starting training")
    tstart = time()  # start timer
    model.train()  # train model
    training_time = time() - tstart

    # clear the sample cache
    sampler.cache(model).empty()

    # sample kernels and time
    print("starting sampling")
    tstart = time()
    sampler.sample(model)
    tend = time()
    elapsed = tend - tstart

    # preprocess sample
    sample_db = sampler.cache(model)["kernels.db"]
    preprocess.preprocess_db(sample_db)

    num_kernels = dbutil.num_rows_in(sample_db, "ContentFiles")
    num_good_kernels = dbutil.num_good_kernels(sample_db)
    num_ugly_kernels = dbutil.num_rows_in(sample_db, "PreprocessedFiles",
                                          "WHERE status=2")
    discard_rate = 1 - (num_good_kernels / num_kernels)
    ugly_rate = 1 - (num_ugly_kernels / num_kernels)

    total_charcount = dbutil.cc(sample_db, "ContentFiles")
    good_charcount = dbutil.cc(sample_db,
                               "PreprocessedFiles",
                               condition="WHERE status=0")

    efficiency = good_charcount / total_charcount
    throughput = good_charcount / elapsed

    return {
        "training_time": training_time,
        "sampling_time": elapsed,
        "num_kernels": num_kernels,
        "num_good_kernels": num_good_kernels,
        "discard_rate": discard_rate,
        "ugly_rate": ugly_rate,
        "total_charcount": total_charcount,
        "good_charcount": good_charcount,
        "efficiency": efficiency,  # good_chars / total_chars
        "throughput": throughput,  # good_chars / second
        "corpus_dir": model.corpus.cache.path,
        "model_dir": model.cache.path,
        "sampler_dir": sampler.cache(model).path,
    }
示例#4
0
def evaluate(model, sampler):
    """ evaluate sampling efficiency """
    model.cache.empty()  # clear checkpoint cache
    print("starting training")
    tstart = time()  # start timer
    model.train()  # train model
    training_time = time() - tstart

    # clear the sample cache
    sampler.cache(model).empty()

    # sample kernels and time
    print("starting sampling")
    tstart = time()
    sampler.sample(model)
    tend = time()
    elapsed = tend - tstart

    # preprocess sample
    sample_db = sampler.cache(model)["kernels.db"]
    preprocess.preprocess_db(sample_db)

    num_kernels = dbutil.num_rows_in(sample_db, "ContentFiles")
    num_good_kernels = dbutil.num_good_kernels(sample_db)
    num_ugly_kernels = dbutil.num_rows_in(sample_db, "PreprocessedFiles",
                                          "WHERE status=2")
    discard_rate = 1 - (num_good_kernels / num_kernels)
    ugly_rate = 1 - (num_ugly_kernels / num_kernels)


    total_charcount = dbutil.cc(sample_db, "ContentFiles")
    good_charcount = dbutil.cc(sample_db, "PreprocessedFiles",
                               condition="WHERE status=0")

    efficiency = good_charcount / total_charcount
    throughput = good_charcount / elapsed

    return {
        "training_time": training_time,
        "sampling_time": elapsed,
        "num_kernels": num_kernels,
        "num_good_kernels": num_good_kernels,
        "discard_rate": discard_rate,
        "ugly_rate": ugly_rate,
        "total_charcount": total_charcount,
        "good_charcount": good_charcount,
        "efficiency": efficiency,  # good_chars / total_chars
        "throughput": throughput,  # good_chars / second
        "corpus_dir": model.corpus.cache.path,
        "model_dir": model.cache.path,
        "sampler_dir": sampler.cache(model).path,
    }
示例#5
0
def main(model,
         sampler,
         print_file_list=False,
         print_corpus_dir=False,
         print_model_dir=False,
         print_sampler_dir=False) -> None:
    """
    Main entry point for clgen.

    Arguments:
        model (str): Path to model.
        sample (str): Path to sampler.
        print_corpus_dir (bool, optional): If True, print cache path and exit.
        print_model_dir (bool, optional): If True, print cache path and exit.
        print_sampler_dir (bool, optional): If True, print cache path and exit.
    """
    import clgen.model
    import clgen.sampler
    from clgen import log

    model_json = load_json_file(model)
    model = clgen.model.from_json(model_json)

    sampler_json = load_json_file(sampler)
    sampler = clgen.sampler.from_json(sampler_json)

    # print cache paths
    if print_file_list:
        files = sorted(
            fs.ls(model.corpus.cache.path, abspaths=True, recursive=True) +
            fs.ls(model.cache.path, abspaths=True, recursive=True) +
            fs.ls(sampler.cache(model).path, abspaths=True, recursive=True))
        print('\n'.join(files))
        sys.exit(0)
    elif print_corpus_dir:
        print(model.corpus.cache.path)
        sys.exit(0)
    elif print_model_dir:
        print(model.cache.path)
        sys.exit(0)
    elif print_sampler_dir:
        print(sampler.cache(model).path)
        sys.exit(0)

    model.train()
    sampler.sample(model)
argspec = ['__global float*', '__global float*', '__global float*', 'const int']
sampler = clgen.sampler.from_json({
        "kernels": { 
            "args": argspec,
            "max_length": 1000
        },
        "sampler": {
            "batch_size": 100,
            "max_kernels": 15
        }
    })

print("Seed text:", clgen.sampler.serialize_argspec(argspec), "\n")
sampler.cache(model).empty()
sampler.sample(model)

db = sampler.cache(model)["kernels.db"]
num_good_kernels = clgen.dbutil.num_good_kernels(db)
clgen.explore.explore(db)

conn = sqlite3.connect(db)
print(conn)
c = conn.cursor()

#c.execute("SELECT * FROM sqlite_master;")
c.execute("SELECT * FROM PreprocessedFiles WHERE status=0;")
rows = c.fetchall()

if not(os.path.isdir("generatedkernels")):
    os.mkdir("./generatedkernels")