Exemplo n.º 1
0
def get_all_sampler_datasets():
    datasets = []
    sampledirs = []
    for versioncache in fs.ls(fs.path("~/.cache/clgen"), abspaths=True):
        samplerdir = fs.path(versioncache, "sampler")
        if fs.isdir(samplerdir):
            sampledirs += fs.ls(samplerdir, abspaths=True)

    for samplerdir in sampledirs:
        inpath = fs.path(samplerdir, "kernels.db")
        if fs.isfile(inpath):
            datasets.append(inpath)
    return datasets
Exemplo n.º 2
0
def read_file(*components, **kwargs):
    """
  Load a JSON data blob.

  Arguments:
      path (str): Path to file.
      must_exist (bool, otional): If False, return empty dict if file does
          not exist.

  Returns:
      array or dict: JSON data.

  Raises:
      File404: If path does not exist, and must_exist is True.
      InvalidFile: If JSON is malformed.
  """
    must_exist = kwargs.get("must_exist", True)

    if must_exist:
        path = fs.must_exist(*components)
    else:
        path = fs.path(*components)

    try:
        with open(path) as infile:
            return loads(infile.read())
    except ValueError as e:
        raise ValueError(
            "malformed JSON file '{path}'. Message from parser: {err}".format(
                path=fs.basename(path), err=str(e)))
    except IOError as e:
        if not must_exist:
            return {}
        else:
            return e
Exemplo n.º 3
0
def main():
    parser = ArgumentParser(description=__description__)
    parser.add_argument("classification")
    parser.add_argument("outdir")
    args = parser.parse_args()

    db.init("cc1")
    session = db.make_session()

    program_ids = [
      x[0] for x in session.query(sql.distinct(CLSmithResult.program_id)) \
        .filter(CLSmithResult.classification == args.classification).all()]

    header = fs.read_file(dsmith.data_path("include", "clsmith.h"))

    fs.mkdir(args.outdir)

    for program_id in ProgressBar()(program_ids):
        outpath = fs.path(args.outdir, program_id + ".cl")

        if not fs.exists(outpath):
            program = session.query(CLSmithProgram) \
              .filter(CLSmithProgram.id == program_id).one()

            pre, post = program.src.split('#include "CLSmith.h"')

            inlined = pre + header + post

            with open(outpath, "w") as outfile:
                print(inlined, file=outfile)
Exemplo n.º 4
0
def echo(*args, **kwargs):
  """
  Write a message to a file.

  Arguments:
      args A list of arguments which make up the message. The last argument
          is the path to the file to write to.
  """
  msg = args[:-1]
  path = fs.path(args[-1])
  append = kwargs.pop("append", False)

  if append:
    with open(path, "a") as file:
      print(*msg, file=file, **kwargs)
  else:
    with open(fs.path(path), "w") as file:
      print(*msg, file=file, **kwargs)
Exemplo n.º 5
0
def root_path(*path) -> Path:
    """
  Path relative to dsmith source respository.

  Arguments:
      *path (List[str]): Path components.

  Returns:
      Path: Path.
  """
    return fs.path(ROOT, *path)
Exemplo n.º 6
0
Arquivo: cache.py Projeto: BeauJoh/phd
    def keypath(self, key):
        """
    Get the filesystem path for a key.

    Arguments:
        key: Key.

    Returns:
        str: Absolute path.
    """
        return fs.path(self.path, self.escape_key(key))
Exemplo n.º 7
0
def data_path(*path) -> Path:
    """
  Path to data file.

  Arguments:
      *path (List[str]): Path components.

  Returns:
      Path: Path.
  """
    return resource_filename(__name__, fs.path("data", *path))
Exemplo n.º 8
0
def cachepath(*relative_path_components: str) -> pathlib.Path:
    """Return path to file system cache.

  Args:
    *relative_path_components: Relative path of cache.

  Returns:
    Absolute path of file system cache.
  """
    cache_root = pathlib.Path(os.environ.get("CLGEN_CACHE", "~/.cache/clgen/"))
    cache_root.expanduser().mkdir(parents=True, exist_ok=True)
    return pathlib.Path(fs.path(cache_root, *relative_path_components))
Exemplo n.º 9
0
def test_rmtrash():
    with tempfile.NamedTemporaryFile(prefix='labm8_') as f:
        assert fs.isfile(f.name)
        fs.rmtrash(f.name)
        assert not fs.isfile(f.name)
        fs.rmtrash(f.name)
        fs.rm(f.name)
    with tempfile.TemporaryDirectory() as d:
        fs.rm(d)
        fs.mkdir(d, "foo/bar")
        system.echo("Hello, world!", fs.path(d, "foo/bar/baz"))
        assert fs.isfile(f, "foo/bar/baz")
        fs.rmtrash(d)
        assert not fs.isfile(d, "foo/bar/baz")
        assert not fs.isdir(d)
Exemplo n.º 10
0
Arquivo: make.py Projeto: BeauJoh/phd
def make(target="all", dir=".", **kwargs):
    """
  Run make.

  Arguments:

      target (str, optional): Name of the target to build. Defaults
        to "all".
      dir (str, optional): Path to directory containing Makefile.
      **kwargs (optional): Any additional arguments to be passed to
        system.run().

  Returns:

      (int, str, str): The first element is the return code of the
        make command. The second and third elements are the stdout
        and stderr of the process.

  Raises:

      NoMakefileError: In case a Makefile is not found in the target
        directory.
      NoTargetError: In case the Makefile does not support the
        requested target.
      MakeError: In case the target rule fails.
  """
    if not fs.isfile(fs.path(dir, "Makefile")):
        raise NoMakefileError("No makefile in '{}'".format(fs.abspath(dir)))

    fs.cd(dir)

    # Default parameters to system.run()
    if "timeout" not in kwargs: kwargs["timeout"] = 300

    ret, out, err = system.run(["make", target], **kwargs)
    fs.cdpop()

    if ret > 0:
        if re.search(_BAD_TARGET_RE, err):
            raise NoTargetError("No rule for target '{}'".format(target))
        else:
            raise MakeError("Target '{}' failed".format(target))

        raise MakeError("Failed")

    return ret, out, err
Exemplo n.º 11
0
def data_path(*components, exists=True) -> str:
    """
  Return absolute path to unittest data file. Data files are located in
  <package>/test/data.

  Arguments:
      *components (str): Relative path.
      exists (bool, optional): If True, require that file exists.

  Returns:
      str: Absolute path.

  Raises:
      Data404: If path doesn't exist and 'exists' is True.
  """
    path = fs.path(*components)

    abspath = os.path.join(os.path.dirname(__file__), "data", path)
    if exists and not os.path.exists(abspath):
        raise Data404(abspath)
    return abspath
Exemplo n.º 12
0
Arquivo: llvm.py Projeto: BeauJoh/phd
import re

import subprocess

from phd.lib.labm8 import fs

_LINE_RE = re.compile("^(?P<count>\d+) instcount - Number of (?P<type>.+)")

DEFAULT_LLVM_PATH = fs.path(
    "~/src/msc-thesis/skelcl/libraries/llvm/build/bin/")


class Error(Exception):
    """
  LLVM module error.
  """
    pass


class ProgramNotFoundError(Error):
    """
  Error thrown if a program is not found.
  """
    pass


class ClangError(Error):
    """
  Error thrown if clang exits with non-zero status.
  """
    pass
Exemplo n.º 13
0
def test_path_homedir():
    assert os.path.expanduser("~") == fs.path("~")
    assert (os.path.join(os.path.expanduser("~"),
                         "foo") == fs.path("~", "foo"))
Exemplo n.º 14
0
import dbus.service

from phd.lib.labm8 import fs

LOCAL_DIR = fs.path("~/.omnitune")


class Error(Exception):
    pass


class Server(dbus.service.Object):
    pass
Exemplo n.º 15
0
 def get_benchmarks(platform):
     B = pd.read_csv(
         fs.path("runtimes/{platform}-benchmarks.csv".format(**vars())))
     B["source"] = [escape_suite_name(x) for x in B["benchmark"]]
     B["synthetic"] = [0] * len(B)
     return B
Exemplo n.º 16
0
class Server(omnitune.Server):
    LLVM_PATH = fs.path("~/src/msc-thesis/skelcl/libraries/llvm/build/bin/")

    def __init__(self, *args, **kwargs):
        """
    Construct a SkelCL server.
    """
        # Fail if we can't find the path
        if not fs.isdir(self.LLVM_PATH):
            io.fatal("Could not find llvm path '{0}'".format(self.LLVM_PATH))

        super(Server, self).__init__(*args, **kwargs)
        io.info("Registered server %s/SkelCLServer ..." % SESSION_NAME)

        # Setup persistent database.
        self.db = migrate(Database())
        self.db.status_report()

        # Create an in-memory sample strategy cache.
        self.strategies = cache.TransientCache()

    @dbus.service.method(INTERFACE_NAME,
                         in_signature='siiiiiiiisss',
                         out_signature='(nn)')
    def RequestTrainingStencilParams(self, device_name, device_count, north,
                                     south, east, west, data_width,
                                     data_height, type_in, type_out, source,
                                     max_wg_size):
        """
    Request training parameter values for a SkelCL stencil operation.

    Determines the parameter values to use for a SkelCL stencil
    operation by iterating over the space of parameter values.

    Args:

        device_name (str): The name of the execution device.
        device_count (int): The number of execution devices.
        north (int): The stencil shape north direction.
        south (int): The stencil shape south direction.
        east (int): The stencil shape east direction.
        west (int): The stencil shape west direction.
        data_width (int): The number of columns of data.
        data_height (int): The number of rows of data.
        type_in (str): The input data type.
        type_out (str): The output data type.
        max_wg_size (int): The maximum kernel workgroup size.
        source (str): The stencil kernel source code.

    Returns:
        A tuple of work group size values, e.g.

        (16,32)
    """
        # Parse arguments.
        device_name = util.parse_str(device_name)
        device_count = int(device_count)
        north = int(north)
        south = int(south)
        east = int(east)
        west = int(west)
        data_width = int(data_width)
        data_height = int(data_height)
        type_in = util.parse_str(type_in)
        type_out = util.parse_str(type_out)
        source = util.parse_str(source)
        max_wg_size = int(max_wg_size)

        # Get the next scenario ID to train on.
        wg = training.random_wg_value(max_wg_size)

        return wg

    @dbus.service.method(INTERFACE_NAME,
                         in_signature='siiiiiiiisss',
                         out_signature='(nn)')
    def RequestStencilParams(self, device_name, device_count, north, south,
                             east, west, data_width, data_height, type_in,
                             type_out, source, max_wg_size):
        """
    Request parameter values for a SkelCL stencil operation.

    Determines the parameter values to use for a SkelCL stencil
    operation, using a machine learning classifier to predict the
    optimal parameter values given a set of features determined
    from the arguments.

    Args:

        device_name (str): The name of the execution device.
        device_count (int): The number of execution devices.
        north (int): The stencil shape north direction.
        south (int): The stencil shape south direction.
        east (int): The stencil shape east direction.
        west (int): The stencil shape west direction.
        data_width (int): The number of columns of data.
        data_height (int): The number of rows of data.
        type_in (str): The input data type.
        type_out (str): The output data type.
        max_wg_size (int): The maximum kernel workgroup size.
        source (str): The stencil kernel source code.

    Returns:
        A tuple of work group size values, e.g.

        (16,32)
    """

        start_time = time.time()

        # Parse arguments.
        device_name = util.parse_str(device_name)
        device_count = int(device_count)
        north = int(north)
        south = int(south)
        east = int(east)
        west = int(west)
        data_width = int(data_width)
        data_height = int(data_height)
        source = util.parse_str(source)
        max_wg_size = int(max_wg_size)

        # TODO: Perform feature extraction & classification
        wg = (64, 32)

        end_time = time.time()

        io.debug(("RequestStencilParams() -> "
                  "({c}, {r}) [{t:.3f}s]".format(c=wg[0],
                                                 r=wg[1],
                                                 t=end_time - start_time)))

        return wg

    @dbus.service.method(INTERFACE_NAME,
                         in_signature='siiiiiiisssiiid',
                         out_signature='')
    def AddStencilRuntime(self, device_name, device_count, north, south, east,
                          west, data_width, data_height, type_in, type_out,
                          source, max_wg_size, wg_c, wg_r, runtime):
        """
    Add a new stencil runtime.

    Args:

        device_name (str): The name of the execution device.
        device_count (int): The number of execution devices.
        north (int): The stencil shape north direction.
        south (int): The stencil shape south direction.
        east (int): The stencil shape east direction.
        west (int): The stencil shape west direction.
        data_width (int): The number of columns of data.
        data_height (int): The number of rows of data.
        type_in (str): The input data type.
        type_out (str): The output data type.
        source (str): The stencil kernel source code.
        max_wg_size (int): The maximum kernel workgroup size.
        wg_c (int): The workgroup size used (columns).
        wg_r (int): The workgroup size used (rows).
        runtime (double): The measured runtime in milliseconds.

    """
        # Parse arguments.
        device_name = util.parse_str(device_name)
        device_count = int(device_count)
        north = int(north)
        south = int(south)
        east = int(east)
        west = int(west)
        data_width = int(data_width)
        data_height = int(data_height)
        type_in = util.parse_str(type_in)
        type_out = util.parse_str(type_out)
        source = util.parse_str(source)
        max_wg_size = int(max_wg_size)
        wg_c = int(wg_c)
        wg_r = int(wg_r)
        runtime = float(runtime)

        # Lookup IDs
        device = self.db.device_id(device_name, device_count)
        kernel = self.db.kernel_id(north, south, east, west, max_wg_size,
                                   source)
        dataset = self.db.datasets_id(data_width, data_height, type_in,
                                      type_out)
        scenario = self.db.scenario_id(device, kernel, dataset)
        params = self.db.params_id(wg_c, wg_r)

        # Add entry into runtimes table.
        self.db.add_runtime(scenario, params, runtime)
        self.db.commit()

        io.debug(("AddStencilRuntime({scenario}, {params}, {runtime})".format(
            scenario=scenario[:8], params=params, runtime=runtime)))

    @dbus.service.method(INTERFACE_NAME,
                         in_signature='siiiiiiisssiii',
                         out_signature='')
    def RefuseStencilParams(self, device_name, device_count, north, south,
                            east, west, data_width, data_height, type_in,
                            type_out, source, max_wg_size, wg_c, wg_r):
        """
    Mark a set of parameters as bad.

    Args:

        device_name (str): The name of the execution device.
        device_count (int): The number of execution devices.
        north (int): The stencil shape north direction.
        south (int): The stencil shape south direction.
        east (int): The stencil shape east direction.
        west (int): The stencil shape west direction.
        data_width (int): The number of columns of data.
        data_height (int): The number of rows of data.
        type_in (str): The input data type.
        type_out (str): The output data type.
        source (str): The stencil kernel source code.
        max_wg_size (int): The maximum kernel workgroup size.
        wg_c (int): The workgroup size used (columns).
        wg_r (int): The workgroup size used (rows).

    """
        # Parse arguments.
        device_name = util.parse_str(device_name)
        device_count = int(device_count)
        north = int(north)
        south = int(south)
        east = int(east)
        west = int(west)
        data_width = int(data_width)
        data_height = int(data_height)
        type_in = util.parse_str(type_in)
        type_out = util.parse_str(type_out)
        source = util.parse_str(source)
        max_wg_size = int(max_wg_size)
        wg_c = int(wg_c)
        wg_r = int(wg_r)

        # Lookup IDs
        device = self.db.device_id(device_name, device_count)
        kernel = self.db.kernel_id(north, south, east, west, max_wg_size,
                                   source)
        dataset = self.db.datasets_id(data_width, data_height, type_in,
                                      type_out)
        scenario = self.db.scenario_id(device, kernel, dataset)
        params = self.db.params_id(wg_c, wg_r)

        # Add entry into runtimes table.
        self.db.refuse_params(scenario, params)
        self.db.commit()

        io.debug(("RefuseStencilParams({scenario}, {params})".format(
            scenario=scenario[:8], params=params, runtime=runtime)))
Exemplo n.º 17
0
class TestLLVM(TestCase):
    LLVM_PATH = fs.path("~/src/msc-thesis/skelcl/libraries/llvm/build/bin/")

    # assert_program_exists():
    def test_assert_program_exists(self):
        self._test(None, llvm.assert_program_exists(__file__))

    def test_assert_program_exists_fail(self):
        self.assertRaises(llvm.ProgramNotFoundError,
                          llvm.assert_program_exists, "/not a real path")

    # bitcode()
    def test_bitcode_cl(self):
        self._test(
            self.stencil_gaussian_kernel_bc,
            llvm.bitcode(self.stencil_gaussian_kernel,
                         language="cl",
                         path=self.LLVM_PATH))

    def test_bitcode_error_bad_src(self):
        self.assertRaises(llvm.ClangError,
                          llvm.bitcode,
                          "<NOT REAL CODE>",
                          path=self.LLVM_PATH)

    def test_bitcode_error_bad_lang(self):
        self.assertRaises(llvm.ClangError,
                          llvm.bitcode,
                          self.stencil_gaussian_kernel,
                          language="foobar",
                          path=self.LLVM_PATH)

    def test_bitcode_missing_clang(self):
        self.assertRaises(llvm.ProgramNotFoundError,
                          llvm.bitcode,
                          "",
                          path="/not a real path")

    # parse_instcounts()
    def test_parse_isntcounts(self):
        self._test(self.stencil_gaussian_kernel_ic_json,
                   llvm.parse_instcounts(self.stencil_gaussian_kernel_ic))

    def test_parse_isntcounts_empty(self):
        self._test({}, llvm.parse_instcounts(""))

    # instcounts()
    def test_instcounts_cl(self):
        self._test(
            self.stencil_gaussian_kernel_ic_json,
            llvm.instcounts(self.stencil_gaussian_kernel_bc,
                            path=self.LLVM_PATH))

    def test_instcounds_missing_opt(self):
        self.assertRaises(llvm.ProgramNotFoundError,
                          llvm.instcounts,
                          "",
                          path="/not a real path")

    # instcounts2ratios()
    def test_instcounts2ratios(self):
        self._test(
            self.stencil_gaussian_kernel_ratios_json,
            llvm.instcounts2ratios(self.stencil_gaussian_kernel_ic_json))
Exemplo n.º 18
0
def load_data_desc(platform,
                   source="B",
                   max_seq_len=1000,
                   atomizer=CharacterAtomizer,
                   quiet=False):
    """ load experimental results """
    def get_benchmarks(platform):
        B = pd.read_csv(
            fs.path("runtimes/{platform}-benchmarks.csv".format(**vars())))
        B["source"] = [escape_suite_name(x) for x in B["benchmark"]]
        B["synthetic"] = [0] * len(B)
        return B

    def get_npb_benchmarks(platform):
        B = get_benchmarks(platform)
        msk = B["source"] == "NPB"
        return B[msk]

    def get_synthetics(platform):
        S = pd.read_csv(
            fs.path("runtimes/{platform}-clgen.csv".format(**vars())))
        S["source"] = ["CLgen"] * len(S)
        S["synthetic"] = [1] * len(S)
        return S

    if source == "B":
        dataframe = get_benchmarks(platform)
    elif source == "S":
        dataframe = get_synthetics(platform)
    elif source == "BS":
        dataframe = pd.concat(
            (get_benchmarks(platform), get_synthetics(platform)))
    elif source == "N":
        dataframe = get_npb_benchmarks(platform)
    elif source == "NS":
        dataframe = pd.concat(
            (get_npb_benchmarks(platform), get_synthetics(platform)))
    else:
        raise Exception

    dataframe["oracle_enc"] = [
        1 if x == "GPU" else 0 for x in dataframe["oracle"].values
    ]
    dataframe["benchmark_name"] = [
        escape_benchmark_name(b) for b in dataframe["benchmark"].values
    ]

    # load source code:
    source_dir = fs.path("kernels")
    srcs, benchmark_names = [], []
    for row in dataframe["benchmark"].values:
        inpath = fs.path(source_dir, row + ".cl")
        with open(inpath) as infile:
            src = infile.read()
        if not src.startswith("__kernel void A"):
            print(fs.basename(inpath))
            raise Exception(src)
        srcs.append(src)
    dataframe["src"] = srcs
    dataframe["src_len"] = [len(s) for s in srcs]

    if not quiet:
        print("num instances {} ({} synthetic, {} benchmarks)".format(
            len(dataframe), sum(dataframe["synthetic"].values),
            len(dataframe) - sum(dataframe["synthetic"].values)))
        print("unique kernels", len(set(srcs)))

    # encode and pad sequences:
    atomizer = atomizer.from_text(''.join(dataframe["src"].values))

    seqs = [atomizer.atomize(seq) for seq in dataframe["src"].values]
    seq_length = min(max(len(s) for s in seqs), max_seq_len)
    pad_val = atomizer.vocab_size + 1
    dataframe["seq_len"] = [len(s) for s in seqs]
    dataframe["seq"] = list(
        pad_sequences(seqs, maxlen=seq_length, value=pad_val))

    if not quiet:
        print("vocab size", atomizer.vocab_size + 1)
        print("pad val", pad_val)
        print("padded seq length", seq_length)

    return {
        "dataframe": dataframe,
        "seq_length": seq_length,
        "atomizer": atomizer
    }
Exemplo n.º 19
0
def test_path():
    assert "foo/bar" == fs.path("foo", "bar")
    assert "foo/bar/car" == fs.path("foo/bar", "car")
Exemplo n.º 20
0
 def get_synthetics(platform):
     S = pd.read_csv(
         fs.path("runtimes/{platform}-clgen.csv".format(**vars())))
     S["source"] = ["CLgen"] * len(S)
     S["synthetic"] = [1] * len(S)
     return S
Exemplo n.º 21
0
from experimental.dsmith import Colors

runtime_t = NewType('runtime_t', float)
status_t = NewType('status_t', int)
return_t = namedtuple('return_t', ['runtime', 'status', 'stdout', 'stderr'])

# build paths
exec_path = dsmith.root_path("third_party", "clsmith", "build", "CLSmith")
cl_launcher_path = dsmith.root_path("third_party", "clsmith", "build",
                                    "cl_launcher")
include_path = dsmith.root_path("third_party", "clsmith", "runtime")

# sanity checks
assert fs.isexe(exec_path)
assert fs.isexe(cl_launcher_path)
assert fs.isfile(fs.path(include_path, "CLSmith.h"))


def clsmith_cli(*args, timeout: int = 60, exec_path=exec_path) -> List[str]:
    return ["timeout", "--signal=9", str(timeout), exec_path] + list(args)


def clsmith(*args, exec_path=exec_path) -> return_t:
    """
      Returns:
          return_t: A named tuple consisting of runtime (float),
              status (int), stdout (str), and stderr (str).
  """
    start_time = time()

    cli = clsmith_cli(*args)