def append(caf, targets: 'TARGET', queue: 'URL', maxdepth: ('--maxdepth', int)): """ Append the list of prepared tasks to a given queue. Usage: caf append URL [TARGET...] [--maxdepth N] Options: --maxdepth N Maximum depth. """ from urllib.request import urlopen url = caf.get_queue_url(queue, 'append') or queue roots = [caf.out/t for t in targets] \ if targets else (caf.out).glob('*') tasks = OrderedDict() for path in find_tasks(*roots, unsealed=True, maxdepth=maxdepth): cellarid = get_stored(path) if cellarid not in tasks: tasks[cellarid] = path if not tasks: error('No tasks to submit') data = '\n'.join('{} {}'.format(label, h) for h, label in reversed(tasks.items())).encode() with urlopen(url, data=data) as r: queue_url = r.read().decode() print('./caf work --queue {}'.format(queue_url)) with open('.caf/LAST_QUEUE', 'w') as f: f.write(queue_url)
def get_hashes(self): """Get hashes of task's dependencies. Dependencies consist of all files and on locks of children. """ with cd(self.path): hashes = {} for dirpath, dirnames, filenames in os.walk('.'): if dirpath == '.': dirnames[:] = [name for name in dirnames if name not in ['.caf'] + list(self.links)] for name in filenames: filepath = Path(dirpath)/name if filepath.is_symlink(): target = os.readlink(str(filepath)) if Path(target).is_absolute(): error('Cannot link to absolute paths in tasks') if str(filepath) in self.files: with cd(filepath.parent): hashes[str(filepath)] = get_file_hash(Path(target)) else: hashes[str(filepath)] = target else: make_nonwritable(filepath) hashes[str(filepath)] = get_file_hash(filepath) for linkname in self.links: hashes[linkname] = get_file_hash(Path(linkname)/'.caf/lock') return hashes
def check_caflib(path, src, env): try: info('Loading hook "{}"'.format(path)) module = imp.new_module(path.stem) exec(compile(path.open().read(), path.name, 'exec'), module.__dict__) except: import traceback traceback.print_exc() error('There was an error while reading hook "{}"'.format(path)) imports = [inspect.getmodule(obj) for _, obj in inspect.getmembers(module)] imports = set(Path(i.__file__) for i in imports if i) imports = [i for i in imports if 'caflib' in i.parts] files = [] for i in imports: if i.name != '__init__.py': files.append(i) else: _reported.append(( warn, 'Hook "{}" is loading whole caflib'.format(path))) files.extend(i.parent.glob('**/*.py')) files = sorted(set(files)) if files: env['PYTHONPATH'].append(caflib_path) for file in files: relpath = '/'.join(dropwhile(lambda x: x != 'caflib', file.parts)) with (Path(caflib_path)/relpath).open() as f: h = md5(f.read().encode()).hexdigest() src = '{}\n# md5 {}: {}' \ .format(src, relpath, h) return src
def init(caf): """ Initialize the Caf repository. Usage: caf init By default create directory in .caf/db. If 'cache' is defined in ~/.config/caf/conf.yaml, the repository is created there and symlinked to .caf/db, otherwise it is created locally. """ if 'cache' in caf.conf: timestamp = get_timestamp() cache_path = Path(caf.conf['cache'])/'{}_{}'.format(Path().resolve().name, timestamp) mkdir(cache_path) relink(cache_path, caf.cache, relative=False) else: cache_path = caf.cache if cache_path.exists(): error('{} exists, cannot overwrite'.format(cache_path)) mkdir(cache_path) info('Initializing an empty repository at {}.'.format(cache_path)) mkdir(caf.cellar) mkdir(caf.brewery) with open('.gitignore', 'w') as f: f.write('\n'.join(['.caf'])) with open(os.devnull, 'w') as null: sp.call(['git', 'init'], stdout=null) sp.call(['git', 'add', 'caf', 'cscript.py', '.gitignore'], stdout=null) sp.call(['git', 'commit', '-m', 'initial commit'], stdout=null)
def work(caf, profile: '--profile', n: ('-j', int), targets: 'TARGET', limit: ('--limit', int), queue: '--queue', myid: '--id', dry: '--dry', do_init: 'init', do_build: 'build', verbose: '--verbose', last_queue: '--last', maxdepth: ('--maxdepth', int)): """ Execute all prepared build tasks. Usage: caf [[init] build] work [-v] [--limit N] [--profile PROFILE [-j N] | [--id ID] [--dry]] [--last | --queue URL | [TARGET...] [--maxdepth N]] Options: -n, --dry Dry run (do not write to disk). --id ID ID of worker [default: 1]. -p, --profile PROFILE Run worker via ~/.config/caf/worker_PROFILE. -q, --queue URL Take tasks from web queue. --last As above, but use the last submitted queue. -j N Number of launched workers [default: 1]. -l, --limit N Limit number of tasks to N. -v, --verbose Be more verbose. --maxdepth N Maximal depth. """ import subprocess if do_init: build(['caf', 'init', 'build'], caf) elif do_build: build(['caf', 'build'], caf) if profile: for _ in range(n): cmd = ['{}/.config/caf/worker_{}'.format(os.environ['HOME'], profile), '-v' if verbose else None, ('--limit', limit), ('--queue', queue), targets, ('--maxdepth', maxdepth)] try: subprocess.check_call(filter_cmd(cmd)) except subprocess.CalledProcessError: error('Running ~/.config/caf/worker_{} did not succeed.' .format(profile)) else: if queue or last_queue: if last_queue: with open('.caf/LAST_QUEUE') as f: queue = f.read().strip() url = caf.get_queue_url(queue, 'get') or queue worker = QueueWorker(myid, caf.cache, url, dry=dry, limit=limit, debug=verbose) else: roots = [caf.out/t for t in targets] \ if targets else (caf.out).glob('*') tasks = OrderedDict() for path in find_tasks(*roots, unsealed=True, maxdepth=maxdepth): cellarid = get_stored(path) if cellarid not in tasks: tasks[cellarid] = str(path) worker = LocalWorker(myid, caf.cache, list(reversed(tasks.items())), dry=dry, limit=limit, debug=verbose) worker.work()
def __init__(self, cellar, top, libpath): try: self.cellar = cellar.resolve() except FileNotFoundError: error('Cellar does not exist, maybe `caf init` first?') self.top = top self.tasks = [] self.targets = defaultdict(dict) self.libpath = libpath
def get_file_hash(path): """Return hashed contents of a file.""" h = hashlib.new(hashf) try: with path.open('rb') as f: h.update(f.read()) except FileNotFoundError: error('File "{}" does not exist'.format(path)) return h.hexdigest()
def proc_remote(self, remotes): if remotes == 'all': remotes = self.remotes.values() else: try: remotes = [self.remotes[r] for r in remotes.split(',')] except KeyError as e: error('Remote "{}" is not defined'.format(e.args[0])) return remotes
def add_to_target(self, task, target, *link, check=True): linkname = slugify(list(link)) if link else None try: if check and linkname in self.targets[target]: error('Link "{}" already in target "{}"'.format(linkname, target)) except TypeError: error('Target must be a string, not {}'.format(type(target).__name__)) self.targets[target][linkname] = task task.targets.append((target, linkname)) return task
def process_features(self, features, attrib=None): with timing('features'): for name, feat in list(features.items()): if not attrib or attrib in getattr(feat, 'feature_attribs', []): with timing(name): try: feat(self) except PermissionError as e: error('Feature "{}" tried to change stored file "{}"' .format(name, e.filename)) del features[name]
def link_deps(self): with cd(self.path): for linkname, link in self.links.items(): relink(os.path.relpath(str(link.task.path)), linkname) for filename, path in self.files.items(): try: relink(os.path.relpath(str(path)), filename) except FileExistsError: if 'RELINK' in os.environ: Path(filename).unlink() relink(os.path.relpath(str(path)), filename) else: error('Something replaced a linked file "{}" with a real file in {}' .format(filename, self))
def build(self, batch): try: batch = batch.resolve() except FileNotFoundError: error('Batch does not exist, maybe `caf build new` first?') with timing('task sorting'): self.sort_tasks() ntskdigit = ceil(log10(len(self.tasks)+1)) with ProgressBar(maxval=len(self.tasks), redirect_stdout=True) as progress: for i, task in enumerate(self.tasks): task.build(batch/'{:0{n}d}'.format(i, n=ntskdigit)) progress.update(i) for report in _reports: report()
def sort_tasks(self): """Sort tasks such that children precede parents (topological sort).""" queue = [] tops = [task for task in self.tasks if not task.parents] while tops: node = tops.pop() queue.insert(0, node) for child in node.children: child._parent_counter += 1 if child._parent_counter == len(child.parents): tops.append(child) in_cycle = [task for task in self.tasks if task._parent_counter != len(task.parents)] if in_cycle: error('There are cycles in the dependency tree') self.tasks = queue
def add_dependency(self, task, *links, needed=False, escape=True): if not links: link = None else: link, links = links[0], links[1:] if self == task: error('Task cannot depend on itself: {}'.format(self)) self.children.append(task) if link is not None: linkname = slugify(link) if escape else link else: self.noname_link_counter += 1 linkname = '_{}'.format(self.noname_link_counter) self.links[linkname] = Task.Link(task, links, needed) task.parents.append((self, linkname)) return self
def __init__(self, path_or_stringio): try: text = path_or_stringio.getvalue() except AttributeError: self.key = path = Path(path_or_stringio) self.name = path.name if self.key not in Template._cache: try: Template._cache[self.key] = path.open().read() info('Loading template "{}"'.format(path)) except FileNotFoundError: error('Template "{}" does not exist'.format(path)) else: self.key = sha1(text.encode()).hexdigest() self.name = self.key[-7:] if self.key not in Template._cache: Template._cache[self.key] = text info('Loading anonymous template')
def process_hook(path): path = Path(path) if not path.is_file(): error('Hook "{}" does not exist'.format(path)) if path in cache: return cache[path] filetype = filetypes.get(path.suffix) if not filetype: error('Unknown hook file type: {}'.format(path)) with path.open() as f: src = f.read() env = defaultdict(list) for dependency in dependencies[filetype]: src = dependency(path, src, env) if filetype == 'python': if 'PYTHONPATH' in env: env['PYTHONPATH'].append('$PYTHONPATH') cmd = 'python3 {}'.format(path) cache[path] = (src, cmd, env) return cache[path]
def replacer(m): token = m.group(1) if ':' in token: token, fmt = token.split(':', 1) else: fmt = None if '=' in token: token, default = token.split('=', 1) else: default = None try_parse = re.match(r'(\w+)\[([\d-]+)\]', token) if try_parse: token, idx = try_parse idx = int(idx) else: idx = None if token in mapping: value = mapping[token] if idx: value = value[idx] elif default: try: value = eval(default) except: error('There was an error when processing default of key "{}" ' 'in template "{}"'.format(token, self.name)) else: error('"{}" not defined when processing template {}' .format(token, self.name)) used.add(token) try: return format(value, fmt) if fmt else str(value) except ValueError: error('Unknown format "{}" when processing key "{}" in template "{}"' .format(fmt, token, self.name))
def build(caf, dry: '--dry', do_init: 'init'): """ Prepare tasks and targets defined in cscript. Usage: caf [init] build [--dry] Options: -n, --dry Dry run (do not write to disk). Tasks are created in .caf/db/Brewery/Latest and if their preparation does not depened on unfinished tasks, they are prepared and stored in .caf/db/Cellar based on their SHA1 hash. Targets (collections of symlinks to tasks) are created in ./build. """ if not hasattr(caf.cscript, 'build'): error('cscript has to contain function build(ctx)') if do_init: init(['caf', 'init'], caf) ctx = Context(caf.cache/cellar, caf.top, caf.libpath) with timing('dependency tree'): caf.cscript.build(ctx) if not dry: timestamp = get_timestamp() mkdir(caf.brewery/timestamp) relink(timestamp, caf.brewery/latest, relative=False) with timing('build'): ctx.build(caf.brewery/latest) if caf.out.is_dir(): shutil.rmtree(str(caf.out)) mkdir(caf.out) with timing('targets'): ctx.make_targets(caf.out, caf.cache) if hasattr(caf.cscript, 'json'): warn('Make sure json is not printing dictionaries in features') with open(os.devnull, 'w') as null: sp.call(['git', 'add', '--all', 'build'], stdout=null) sp.call(['git', 'commit', '-a', '-m', '#build'], stdout=null)
def __init__(self, libpath): super().__init__('caf') self.conf = Configuration('.caf/conf.yaml') self.conf.set_global(Configuration('{}/.config/caf/conf.yaml' .format(os.environ['HOME']))) for cscriptname in ['cscript', 'cscript.py']: if Path(cscriptname).is_file(): break else: cscriptname = None with timing('reading cscript'): try: self.cscript = load_module(cscriptname, self.commands[('unpack',)]._func) \ if cscriptname else object() except RuntimeError: error('There was an error while reading cscript.') self.out = Path(getattr(self.cscript, 'out', 'build')) self.cache = Path(getattr(self.cscript, 'cache', '.caf/db')) self.top = Path(getattr(self.cscript, 'top', '.')) self.cellar = self.cache/cellar self.brewery = self.cache/brewery self.remotes = {name: Remote(r['host'], r['path'], self.top) for name, r in self.conf.get('remotes', {}).items()} self.libpath = libpath
def prepare(self): """Prepare a task. Pull in files and templates, link in files from children, execute features and save the command. Check that all attributes have been consumed. """ try: features = OrderedDict((feat, _features[feat]) if isinstance(feat, str) else (feat.__name__, feat) for feat in listify(self.consume('features'))) except KeyError as e: error('Feature {} is not registered'.format(e.args[0])) self.process_features(features, 'before_files') with cd(self.ctx.top): with timing('files'): for filename in listify(self.consume('files')): if isinstance(filename, tuple): self.store_link_file(filename[0], filename[1]) else: if isinstance(filename, str) \ and ('*' in filename or '?' in filename): for member in glob(filename): self.store_link_file(member) else: self.store_link_file(filename) with timing('hooks'): hooks = {filename: process_hook(filename) for filename in listify(self.consume('hooks'))} with timing('templates'): templates = {} for filename in listify(self.consume('templates')): if isinstance(filename, tuple): source, target = filename elif isinstance(filename, str): source = target = filename else: error("Don't know how to store {!r}".format(filename)) templates[target] = Template(source) with cd(self.path): self.process_features(features, 'before_templates') with timing('templates'): for target, template in templates.items(): processed, used = template.substitute(self.attrs) self.store_link_text(processed, target, template.name) for attr in used: self.consume(attr) with timing('linking'): for linkname, link in self.links.items(): for symlink in link.links: if isinstance(symlink, tuple): target, symlink = symlink else: target = symlink relink('{}/{}'.format(linkname, target), symlink) self.process_features(features) commands = [] env = defaultdict(list) for var, val in (self.consume('_env') or {}).items(): env[var].append(str(val)) for hook_path, (hook_src, hook_cmd, hook_env) in hooks.items(): commands.append(hook_cmd) for var, vals in hook_env.items(): env[var].extend(vals) self.store_link_text(hook_src, hook_path, label=True) command = self.consume('command') if command: commands.append(command) if commands: with open('command', 'w') as f: f.write('\n'.join(commands)) if env: with open('.caf/env', 'w') as f: for var, vals in env.items(): f.write('export {}={}\n' .format(var, ':'.join(map(str, vals)))) if self.attrs: error('Task {} has non-consumed attributs: {}' .format(self, list(self.attrs)))