def _execute(command): """ Execute the given command in the current directory, print error messages if the command failed. Args: command Command to execute Returns: Whether the operation is a success """ with tools.Context(None, "info"): sys.stdout.write("Executing " + repr(command) + "...") sys.stdout.flush() command = subprocess.run(shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE) success = command.returncode == 0 with tools.Context(None, "info" if success else "warning"): if success: print(" done") else: print(" fail (" + str(command.returncode) + ")") for stdname in ("stdout", "stderr"): text = getattr(command, stdname).decode("utf8") if len(text) > 0: with tools.Context(stdname, "trace"): print(text) return success
def _loader_ctypes(so_path): """ Post-building ctypes loading operations. Args: so_path Shared object path """ try: lib = ctypes.CDLL(str(so_path)) register_py(so_path.stem[3:], lambda: lib) except Exception as err: with tools.Context(so_path.stem, "warning"): print("Loading failed for python interface " + repr(str(so_path)) + ": " + str(err)) with tools.Context("traceback", "trace"): traceback.print_exc()
def _loader_ops(so_path): """ Post-building custom ops loading operations. Args: so_path Shared object path """ try: lib = tf.load_op_library(str(so_path)) entries = lib.OP_LIST.ListFields()[0][1] try: while True: opname = entries.pop().ListFields()[0][1] opname = _camel_to_snake(opname) #opname = "bulyan" register_op(opname, getattr(lib, opname)) except IndexError: pass except Exception as err: with tools.Context(so_path.stem, "warning"): print("Loading failed for custom op " + repr(str(so_path)) + ": " + str(err)) with tools.Context("traceback", "trace"): traceback.print_exc()
def _loader(): """ Incrementally rebuild all libraries and register all local operations. """ try: # Check if the CUDA compiler is available nocuda = True for path in os.environ["PATH"].split(os.pathsep): if (pathlib.Path(path) / _build_cudabin).exists(): nocuda = False break # List all common headers headers = [] if _build_include is not None: for path in _build_include.iterdir(): if path.suffix in _build_exts_hdr: headers.append(path) # Compile libraries and load OP doneset = set() failset = set() for dirpath in pathlib.Path(__file__).resolve().parent.iterdir(): ident = dirpath.name[:3] if dirpath.is_dir() and ident in _loader_hooks.keys( ): # Is a library directory if dirpath not in doneset and dirpath not in failset: so_path = _build_library(dirpath, doneset, failset, headers, nocuda=nocuda) loader = _loader_hooks[ident] if so_path is not None and loader is not None: # Successful build and loader needed loader(so_path) except Exception as err: with tools.Context(ident, "warning"): print("Loading failed while compiling " + repr(ident) + ": " + str(err)) with tools.Context("traceback", "trace"): traceback.print_exc()
def _build_library(libpath, doneset, failset, headers, libstack=[], nocuda=False): """ (Re)build a library directory and its dependencies into their associated shared objects. Args: libpath Library directory path doneset Set of other, successfully built library directory paths to update failset Set of other, not compiling library directory paths to update headers List of shared header paths libstack Constant stack of dependent library directory paths nocuda CUDA compiler was not found, don't try to compile these files Returns: Built library shared object path (None on failure) """ with tools.Context(libpath.name, None): try: # Watch out for a dependency cycle libpath = libpath.resolve() hascycle = libpath in libstack libstack += [libpath] if hascycle: raise RuntimeError("dependency cycle found") # List dependencies and sources (per category) to build depends = [ ] # Library directory paths to build (some may already be built/not compile) shareds = [] # Shared object paths this library depends on headers = list( headers) # Header paths (initially copy of common headers) srcscpu = [] # C++ source paths srcsgpu = [] # CUDA source paths libroot = libpath.parent for path in libpath.iterdir(): try: path = path.resolve() except Exception: if path.is_symlink(): raise RuntimeError("missing dependency " + repr(os.readlink(str(path)))) continue # Else silently ignore file if path.is_dir(): if path.parent != libroot: # Silently ignore directory continue if _build_check_ident(path): # Is a valid dependency depends.append(path) else: if path.parent != libpath: # Silently ignore file continue exts = path.suffixes if len(exts) > 0: if exts[-1] in _build_exts_hdr: headers.append(path) continue elif exts[-1] in _build_exts_cuda: srcsgpu.append(path) continue elif exts[-1] in _build_exts_src: if len(exts) > 1 and exts[-2] in _build_exts_cuda: srcsgpu.append(path) else: srcscpu.append(path) continue elif exts[-1] in _build_exts_obj: continue tools.trace("Ignoring file " + repr(path.name) + ": no/unrecognized extension") if nocuda: # No CUDA compiler => we ignore any CUDA source srcsgpu.clear() # Process dependencies first for path in depends: if path in failset: raise RuntimeError("dependency " + repr(path.name) + " could not be built") if path in doneset: so_path = _build_so_path(path) else: so_path = _build_library(path, doneset, failset, headers, libstack, nocuda=nocuda) if so_path is None: raise RuntimeError("dependency " + repr(path.name) + " could not be built") shareds.append(so_path) # Process sources second obj_paths = [] # Object paths to link for src_path in srcscpu: obj_path = pathlib.Path(str(src_path) + ".o") if _build_must_rebuild(obj_path, headers + [src_path]): if not _execute( _build_cpp_cmd(src_path, obj_path, len(srcsgpu) > 0)): raise RuntimeError("C++ source " + repr(src_path.name) + " did not compile") obj_paths.append(obj_path) for src_path in srcsgpu: obj_path = pathlib.Path(str(src_path) + ".o") if _build_must_rebuild(obj_path, headers + [src_path]): if not _execute(_build_cuda_cmd(src_path, obj_path)): raise RuntimeError("CUDA source " + repr(src_path.name) + " did not compile") obj_paths.append(obj_path) # (Re)link the shared object so_path = _build_so_path(libpath) if _build_must_rebuild(so_path, obj_paths): if not _execute(_build_link_cmd(obj_paths, shareds, so_path)): raise RuntimeError("final shared object " + repr(so_path.name) + " could not be linked") doneset.add(libpath) return so_path except Exception as err: tools.warning("Build failed: " + str(err)) failset.add(libpath) return None
if _build_include is not None: for path in _build_include.iterdir(): if path.suffix in _build_exts_hdr: headers.append(path) # Compile libraries and load OP doneset = set() failset = set() for dirpath in pathlib.Path(__file__).resolve().parent.iterdir(): ident = dirpath.name[:3] if dirpath.is_dir() and ident in _loader_hooks.keys( ): # Is a library directory if dirpath not in doneset and dirpath not in failset: so_path = _build_library(dirpath, doneset, failset, headers, nocuda=nocuda) loader = _loader_hooks[ident] if so_path is not None and loader is not None: # Successful build and loader needed loader(so_path) except Exception as err: with tools.Context(ident, "warning"): print("Loading failed while compiling " + repr(ident) + ": " + str(err)) with tools.Context("traceback", "trace"): traceback.print_exc() with tools.Context("native", None): _loader()
Args: nbworkers Total number of workers nbbyzwrks Declared number of Byzantine workers args Command line argument list """ raise NotImplementedError def aggregate(self, gradients): """ Build the gradient aggregation operation of the given gradients. Args: gradients Computed gradient tensors Returns: Aggregated gradient tensor """ raise NotImplementedError # ---------------------------------------------------------------------------- # # GAR script loader # Register instance _register = tools.ClassRegister("GAR") itemize = _register.itemize register = _register.register instantiate = _register.instantiate del _register # Load all local modules with tools.Context("aggregators", None): tools.import_directory(pathlib.Path(__file__).parent, globals())