Пример #1
0
def evaluate(model, sampler):
    """ evaluate sampling efficiency """
    print("starting sampling")
    sampler.sample(model)

    print("preprocessing sample")
    sample_db = sampler.cache(model)["kernels.db"]
    preprocess.preprocess_db(sample_db)

    num_kernels = dbutil.num_rows_in(sample_db, "ContentFiles")
    num_good_kernels = dbutil.num_good_kernels(sample_db)
    num_ugly_kernels = dbutil.num_rows_in(sample_db, "PreprocessedFiles",
                                          "WHERE status=2")
    discard_rate = 1 - (num_good_kernels / num_kernels)
    ugly_rate = 1 - (num_ugly_kernels / num_kernels)

    total_charcount = dbutil.cc(sample_db, "ContentFiles")
    good_charcount = dbutil.cc(sample_db,
                               "PreprocessedFiles",
                               condition="WHERE status=0")

    return {
        "argspec": sampler.kernel_opts["args"],
        "host": system.HOSTNAME,
        "date": time.nowstr(),
        "num_kernels": num_kernels,
        "num_good_kernels": num_good_kernels,
        "discard_rate": discard_rate,
        "ugly_rate": ugly_rate,
        "total_charcount": total_charcount,
        "good_charcount": good_charcount,
        "corpus_dir": model.corpus.cache.path,
        "model_dir": model.cache.path,
        "sampler_dir": sampler.cache(model).path,
    }
Пример #2
0
    def meta(self) -> dict:
        """
        Get trained model metadata.

        Format spec: https://github.com/ChrisCummins/clgen/issues/25

        Returns:
            dict: Metadata.
        """
        # checksum corpus and model cache files. Paths are relative to cache
        # root.
        cache_root_re = r'^' + cache.ROOT + '/'
        corpus_files = dict(
            (re.sub(cache_root_re, "", x), clgen.checksum_file(x))
            for x in fs.ls(self.corpus.cache.path, abspaths=True))
        model_files = dict(
            (re.sub(cache_root_re, "", x), clgen.checksum_file(x))
            for x in fs.ls(self.cache.path, abspaths=True))

        contents = corpus_files.copy()
        contents.update(model_files)

        _meta = deepcopy(self.opts)
        _meta["version"] = clgen.version()
        _meta["date_packaged"] = labtime.nowstr()
        _meta["corpus"] = self.corpus.meta,
        _meta["contents"] = contents

        return _meta
Пример #3
0
def evaluate(model, sampler):
    """ evaluate sampling efficiency """
    print("starting sampling")
    sampler.sample(model)

    print("preprocessing sample")
    sample_db = sampler.cache(model)["kernels.db"]
    preprocess.preprocess_db(sample_db)

    num_kernels = dbutil.num_rows_in(sample_db, "ContentFiles")
    num_good_kernels = dbutil.num_good_kernels(sample_db)
    num_ugly_kernels = dbutil.num_rows_in(sample_db, "PreprocessedFiles",
                                          "WHERE status=2")
    discard_rate = 1 - (num_good_kernels / num_kernels)
    ugly_rate = 1 - (num_ugly_kernels / num_kernels)

    total_charcount = dbutil.cc(sample_db, "ContentFiles")
    good_charcount = dbutil.cc(sample_db, "PreprocessedFiles",
                               condition="WHERE status=0")

    return {
        "argspec": sampler.kernel_opts["args"],
        "host": system.HOSTNAME,
        "date": time.nowstr(),
        "num_kernels": num_kernels,
        "num_good_kernels": num_good_kernels,
        "discard_rate": discard_rate,
        "ugly_rate": ugly_rate,
        "total_charcount": total_charcount,
        "good_charcount": good_charcount,
        "corpus_dir": model.corpus.cache.path,
        "model_dir": model.cache.path,
        "sampler_dir": sampler.cache(model).path,
    }
Пример #4
0
def add_to_log(log, entry, name=None):
    if name:
        log.append({"date": nowstr(), "name": name, "data": entry})
    else:
        log.append({"date": nowstr(), "data": entry})
Пример #5
0
def add_to_log(log, entry, name=None):
    if name:
        log.append({"date": nowstr(), "name": name, "data": entry})
    else:
        log.append({"date": nowstr(), "data": entry})