Exemplo n.º 1
0
def main(*args):
    if len(args) >= 1:
        subcmd, *args = args
        if subcmd == 'mirror':
            return mirror(*args)
    log.error('usage: ')
    log.error('    fingertip saviour mirror <config-file> [<what-to-mirror>]')
    raise SystemExit()
Exemplo n.º 2
0
def is_supported(dirpath):
    tmp = temp.disappearing_file(dstdir=dirpath)
    r = subprocess.Popen(['cp', '--reflink=always', tmp, tmp + '-reflink'],
                         stderr=subprocess.PIPE)
    _, err = r.communicate()
    r.wait()
    temp.remove(tmp, tmp + '-reflink')
    sure_not = b'failed to clone' in err and b'Operation not supported' in err
    if r.returncode and not sure_not:
        log.error('reflink support detection inconclusive, cache dir problems')
    return r.returncode == 0
Exemplo n.º 3
0
def c_r_offline(self, request):
    cache_url = self.cache_url(request.url)
    log.debug(f'looking up {cache_url} in the cache')
    cache_data = self.cache.get(cache_url)
    if cache_data is None:
        log.error(f'{cache_url} not in cache and fingertip is offline')
        return False
    resp = self.serializer.loads(request, cache_data)
    if not resp:
        log.error(f'{cache_url} cache entry deserialization failed, ignored')
        return False
    log.warning(f'Using {cache_url} from offline cache')
    return resp
Exemplo n.º 4
0
def main(what=None):
    if what in ('setup', 'unmount', 'cleanup'):
        return globals()[what]()
    log.error('usage: ')
    log.error('    fingertip filesystem setup')
    log.error('    fingertip filesystem unmount')
    log.error('    fingertip filesystem cleanup')
    raise SystemExit()
Exemplo n.º 5
0
def storage_setup_wizard():
    assert SETUP in ('auto', 'suggest', 'never')
    if SETUP == 'never':
        return
    size = SIZE
    os.makedirs(path.MACHINES, exist_ok=True)
    if not is_supported(path.MACHINES):
        log.warning(f'images directory {path.MACHINES} lacks reflink support')
        log.warning('without it, fingertip will thrash and fill up your SSD '
                    'in no time')
        backing_file = os.path.join(path.CACHE, 'for-machines.xfs')
        if not os.path.exists(backing_file):
            if SETUP == 'suggest':
                log.info(f'would you like to allow fingertip '
                         f'to allocate {size} at {backing_file} '
                         'for a reflink-enabled XFS loop mount?')
                log.info('(set FINGERTIP_SETUP="auto" environment variable'
                         ' to do it automatically)')
                i = input(f'[{size}]/different size/cancel/ignore> ').strip()
                if i == 'cancel':
                    log.error('cancelled')
                    sys.exit(1)
                elif i == 'ignore':
                    return
                size = i or size
            tmp = temp.disappearing_file(path.CACHE)
            create_supported_fs(tmp, size)
            os.rename(tmp, backing_file)

        log.info(f'fingertip will now mount the XFS image at {backing_file}')
        if SETUP == 'suggest':
            i = input(f'[ok]/skip/cancel> ').strip()
            if i == 'skip':
                log.warning('skipping; '
                            'fingertip will have no reflink superpowers')
                log.warning('tell your SSD I\'m sorry')
                return
            elif i and i != 'ok':
                log.error('cancelled')
                sys.exit(1)

        mount_supported_fs(backing_file, path.MACHINES)
Exemplo n.º 6
0
def main(*args):
    if len(args) >= 1:
        subcmd, *args = args
        if subcmd == 'mirror':
            return mirror(*args)
        if subcmd == 'deduplicate' and not args:
            return deduplicate(log.Sublogger('deduplicate'))
    log.error('usage: ')
    log.error('    fingertip saviour mirror <config-file> [<what-to-mirror>]')
    log.error('    fingertip saviour deduplicate')
    raise SystemExit()
Exemplo n.º 7
0
def main(what=None, older_than=0):
    if OFFLINE:
        log.error('FINGERTIP_OFFLINE set to 1, no cleanup')
        return
    if what == 'everything':
        return everything()
    if what == 'periodic':
        return periodic()
    elif what in ('downloads', 'logs', 'machines'):
        return globals()[what](older_than)
    log.error('usage: ')
    log.error('    fingertip cleanup downloads [<older-than>]')
    log.error('    fingertip cleanup logs [<older-than>]')
    log.error('    fingertip cleanup machines [<expired-for>|all]')
    log.error('    fingertip cleanup everything')
    log.error('    fingertip cleanup periodic')
    raise SystemExit()
Exemplo n.º 8
0
def main(what=None, *args, **kwargs):
    if what == 'everything':
        return everything()
    if what == 'periodic':
        return periodic()
    elif what in ('downloads', 'logs', 'machines', 'tempfiles'):
        return globals()[what](*args, **kwargs)
    log.error('usage: ')
    log.error('    fingertip cleanup downloads [<older-than>]')
    log.error('    fingertip cleanup logs [<older-than>]')
    log.error('    fingertip cleanup machines [<expired-for>|all]')
    log.error('    fingertip cleanup tempfiles [<older-than> [<location]]')
    log.error('    fingertip cleanup everything')
    log.error('    fingertip cleanup periodic')
    raise SystemExit()
Exemplo n.º 9
0
def mirror(config, *what_to_mirror, deduplicate=None):
    total_failures = []
    failures = collections.defaultdict(list)

    with open(config) as f:
        config = ruamel.yaml.YAML(typ='safe').load(f)
    if 'mirror' in config and not config['mirror']:
        log.warning('mirroring is disabled in config')
        return

    hows, whats = config['how'], config['what']
    if not what_to_mirror:
        what_to_mirror = whats.keys()
    else:
        what_to_mirror = ([
            k for k in whats.keys() if any(
                fnmatch.fnmatch(k, req) for req in what_to_mirror)
        ] + [k for k in what_to_mirror if '=' in k])

    if not what_to_mirror:
        log.error('nothing to mirror')
        return

    for resource in what_to_mirror:
        log.debug(f'processing {resource}...')

        if '=' not in resource:  # example: alpine-3.13=alpine/v3.13/main/x86
            resource_name, tail = resource, ''
            s = whats[resource_name]
        else:  # example: alpine-3.13=alpine/v3.13/main/x86
            resource_name, s = resource.split('=', 1)
            # FIXME UGLY: config overrides are stronger that = (more syntax?)
            # TODO: whats shouldn't be a dict, I think, just a list of strings
            if resource_name in whats:
                s = whats[resource_name]

        if s is None:
            s = resource_name
        if '/' in s:
            how_name, suffix = s.split('/', 1)
            suffix = '/' + suffix
        else:
            how_name, suffix = s, ''

        try:
            how = hows[how_name]
        except KeyError:
            log.error(f'missing how section on {how_name}')
            raise SystemExit()

        url = how['url'] + suffix
        method = how['method']
        sources = (how['sources'] if 'sources' in how else [how['url']])
        sources = [s + suffix for s in sources]
        extra_args = {
            k: v
            for k, v in how.items()
            if k not in ('url', 'sources', 'method', 'validate', 'deduplicate')
        }

        if f'method_{method}' not in globals():
            log.error(f'unsupported method {method}')
            raise SystemExit()

        meth = globals()[f'method_{method}']
        symlink = path.saviour(url.rstrip('/'))
        # usually symlink points to data, but while we're working on it,
        # it temporarily points to a consistent snapshot of it named `snap`
        data = os.path.realpath(path.saviour('_', resource_name, 'data'))
        snap = os.path.realpath(path.saviour('_', resource_name, 'snap'))
        temp = os.path.realpath(path.saviour('_', resource_name, 'temp'))
        lockfile = path.saviour('_', resource_name) + '-lock'
        assert data.startswith(os.path.realpath(path.SAVIOUR))
        assert snap.startswith(os.path.realpath(path.SAVIOUR))
        assert temp.startswith(os.path.realpath(path.SAVIOUR))

        sublog = log.Sublogger(f'{method} {resource_name}')
        sublog.info('locking...')
        with lock.Lock(lockfile):
            os.makedirs(os.path.dirname(snap), exist_ok=True)

            if os.path.exists(temp):
                sublog.info('removing stale temp...')
                _remove(temp)
            if os.path.exists(symlink):  # it's already published
                if os.path.exists(data) and not os.path.exists(snap):
                    # `data` is present and is the best we have to publish
                    sublog.info('snapshotting...')
                    reflink.always(data, temp, preserve=True)
                    os.rename(temp, snap)
                if os.path.exists(snap):
                    # link to a consistent snapshot while we work on `data`
                    _symlink(snap, symlink)

            for source in sources:
                sublog.info(f'trying {source}...')
                try:
                    meth(sublog, source, snap, data, **extra_args)
                    assert os.path.exists(data)
                    if 'validate' in how:
                        sublog.info(f'validating with {how["validate"]}...')
                        validator = globals()[f'validate_{how["validate"]}']
                        validator(sublog, source, data)
                        sublog.info('validated')
                    break
                except Exception as _:
                    traceback.print_exc()
                    failures[resource_name].append(source)
                    fingertip.util.log.warning(f'failed to mirror {source}')

            if len(failures[resource_name]) == len(sources):
                sublog.error(f'failed to mirror '
                             f'from all {len(sources)} sources')
                total_failures.append(resource_name)
                continue

            _symlink(data, symlink)
            if os.path.exists(snap):
                os.rename(snap, temp)  # move it out the way asap
                sublog.info('removing now obsolete snapshot...')
                _remove(temp)

        how_deduplicate = how.get('deduplicate', True)
        db_name = how_deduplicate if how_deduplicate is not True else how_name
        if how_deduplicate and deduplicate is not False:
            try:
                _deduplicate(sublog, db_name, resource_name, timeout=1)
            except lock.LockTimeout:
                log.warning(f'skipped deduplication of {resource_name}, '
                            f'db {db_name} was locked')
    if total_failures:
        fingertip.util.log.error(f'failed: {", ".join(total_failures)}')
        raise FailureToMirrorError(", ".join(total_failures))
    log.info('saviour has completed mirroring')
Exemplo n.º 10
0
    def __init__(self, url, *path_components, enough_to_have=None):
        if not path_components:
            path_components = [url.replace('/', '::')]
        self.url = url
        cache_path = path.downloads('git', *path_components, makedirs=True)
        self.path = temp.disappearing_dir(os.path.dirname(cache_path),
                                          path_components[-1])
        lock_working_copy_path = self.path + '-lock'
        lock_cache_path = cache_path + '-lock'
        lock.Lock.__init__(self, lock_working_copy_path)
        sources = saviour_sources()
        self.self_destruct = False
        with lock.Lock(lock_cache_path), lock.Lock(lock_working_copy_path):
            cache_is_enough = False
            if os.path.exists(cache_path):
                try:
                    cr = git.Repo(cache_path)
                    cache_is_enough = (enough_to_have
                                       and _has_rev(cr, enough_to_have))
                except git.GitError as e:
                    log.error(f'something wrong with git cache {cache_path}')
                    log.error(str(e))
                _remove(self.path)

            for i, (source, cache) in enumerate(sources):
                last_source = i == len(sources) - 1

                if cache and cache_is_enough:
                    log.info(f'not re-fetching {url} from {source} '
                             f'because {enough_to_have} '
                             'is already present in cache')
                    git.Repo.clone_from(cache_path, self.path, mirror=True)
                    break

                if source == 'local':
                    surl = path.saviour(url).replace('//', '/')  # workaround
                    if not os.path.exists(surl) and not last_source:
                        continue
                    log.info(f'cloning {url} from local saviour mirror')
                    git.Repo.clone_from(surl, self.path, mirror=True)
                    break
                elif source == 'direct':
                    surl = url
                else:
                    surl = source + '/' + url
                    surl = 'http://' + surl if '://' not in source else surl

                log.info(f'cloning {url} from {source} '
                         f'cache_exists={os.path.exists(cache_path)}...')
                try:
                    # TODO: bare clone
                    # no harm in referencing cache, even w/o cached+
                    git.Repo.clone_from(surl,
                                        self.path,
                                        mirror=True,
                                        dissociate=True,
                                        reference_if_able=cache_path)
                except git.GitError:
                    log.warning(f'could not clone {url} from {source}')
                    if last_source:
                        raise
                    continue
                break

            _remove(cache_path)
            reflink.auto(self.path, cache_path)
            git.Repo.__init__(self, self.path)
            self.remotes[0].set_url(url)
        self.self_destruct = True
Exemplo n.º 11
0
            def _serve_http(self,
                            uri,
                            headers,
                            meth='GET',
                            cache=True,
                            retries=RETRIES_MAX):
                sess = http_cache._get_requests_session(direct=not cache)
                sess_dir = http_cache._get_requests_session(direct=True)
                basename = os.path.basename(uri)

                headers = {
                    k: v
                    for k, v in headers.items()
                    if not (k in STRIP_HEADERS or k.startswith('Proxy-'))
                }
                headers['Accept-Encoding'] = 'identity'
                log.debug(f'{meth} {basename} ({uri})')
                for k, v in headers.items():
                    log.debug(f'{k}: {v}')

                error = None
                try:
                    if meth == 'GET':
                        # direct streaming or trickery might be required...
                        preview = sess.head(uri,
                                            headers=headers,
                                            allow_redirects=False)
                        if (300 <= preview.status_code < 400
                                and 'Location' in preview.headers):
                            nu = preview.headers['Location']
                            if nu.startswith('https://'):
                                # no point in serving that, we have to pretend
                                # that never happened
                                log.debug(f'suppressing HTTPS redirect {nu}')
                                return self._serve_http(nu,
                                                        headers,
                                                        meth=meth,
                                                        cache=cache,
                                                        retries=retries)
                        direct = []
                        if not cache:
                            direct.append('caching disabled for this source')
                        if int(preview.headers.get('Content-Length', 0)) > BIG:
                            direct.append(f'file bigger than {BIG}')
                        if 'Range' in headers:
                            # There seems to be a bug in CacheControl
                            # that serves contents in full if a range request
                            # hits a non-ranged cached entry.
                            direct.append('ranged request, playing safe')
                        if direct:
                            # Don't cache, don't reencode, stream it as is
                            log.debug(f'streaming {basename} directly '
                                      f'from {uri} ({", ".join(direct)})')
                            r = sess_dir.get(uri, headers=headers, stream=True)
                            self._status_and_headers(r.status_code, r.headers)
                            shutil.copyfileobj(r.raw, self.wfile)
                            return

                    # fetch with caching
                    m_func = getattr(sess, meth.lower())
                    r = m_func(uri if '://' in uri else 'http://self' + uri,
                               headers=headers,
                               allow_redirects=False)
                    data = r.content
                    if 'Content-Length' in r.headers:
                        length = int(r.headers['Content-Length'])
                        if len(data) != length:
                            data = hack_around_unpacking(uri, headers, data)
                        assert len(data) == length
                except BrokenPipeError:
                    error = f'Upwards broken pipe for {meth} {uri}'
                except ConnectionResetError:
                    error = f'Upwards connection reset for {meth} {uri}'
                except requests.exceptions.ConnectionError:
                    error = f'Upwards connection error for {meth} {uri}'
                if error:
                    # delay a re-request
                    if retries:
                        log.warning(f'{error} (will retry x{retries})')
                        t = (RETRIES_MAX - retries) / RETRIES_MAX * COOLDOWN
                        time.sleep(t)
                        return self._serve_http(uri,
                                                headers,
                                                meth=meth,
                                                cache=cache,
                                                retries=retries - 1)
                    else:
                        log.error(f'{error} (out of retries)')
                        self.send_error(http.HTTPStatus.SERVICE_UNAVAILABLE)
                        return
                log.debug(f'{meth} {basename} fetched {r.status_code} ({uri})')
                try:
                    self._status_and_headers(r.status_code, r.headers)
                    if meth == 'GET':
                        self.wfile.write(data)
                except BrokenPipeError:
                    log.warning(f'Downwards broken pipe for {meth} {uri}')
                except ConnectionResetError:
                    log.warning(f'Downwards connection reset for {meth} {uri}')
                except requests.exceptions.ConnectionError:
                    log.warning(f'Downwards connection error for {meth} {uri}')
                log.debug(f'{meth} {basename} served ({uri})')
Exemplo n.º 12
0
def mirror(config, *what_to_mirror):
    total_failures = []
    failures = collections.defaultdict(list)

    with open(config) as f:
        config = ruamel.yaml.YAML(typ='safe').load(f)
    hows, whats = config['how'], config['what']
    if not what_to_mirror:
        what_to_mirror = whats.keys()
    else:
        what_to_mirror = [k for k in whats.keys()
                          if any((fnmatch.fnmatch(k, req)
                                  for req in what_to_mirror))]

    for resource_name in what_to_mirror or whats.keys():
        s = whats[resource_name]
        log.debug(f'processing {resource_name}...')

        if s is None:
            how, suffix = resource_name, ''
        elif '/' in s:
            how, suffix = s.split('/', 1)
            suffix = '/' + suffix
        else:
            how, suffix = s, ''

        try:
            how = hows[how]
        except KeyError:
            log.error(f'missing how section on {how}')
            raise SystemExit()

        url = how['url'] + suffix
        method = how['method']
        sources = (how['sources'] if 'sources' in how else [how['url']])
        sources = [s + suffix for s in sources]
        extra_args = {k: v for k, v in how.items()
                      if k not in ('url', 'sources', 'method')}

        if f'method_{method}' not in globals():
            log.error(f'unsupported method {method}')
            raise SystemExit()

        meth = globals()[f'method_{method}']
        symlink = path.saviour(url.rstrip('/'))
        # usually symlink points to data, but while we're working on it,
        # it temporarily points to a consistent snapshot of it named `snap`
        data = path.saviour('_', resource_name, 'data')
        snap = path.saviour('_', resource_name, 'snap')
        temp = path.saviour('_', resource_name, 'temp')
        lockfile = path.saviour('_', resource_name) + '-lock'
        assert data.startswith(path.SAVIOUR)
        assert snap.startswith(path.SAVIOUR)
        assert temp.startswith(path.SAVIOUR)

        sublog = log.Sublogger(f'{method} {resource_name}')
        sublog.info('locking...')
        with lock.Lock(lockfile):
            os.makedirs(os.path.dirname(snap), exist_ok=True)

            if os.path.exists(temp):
                sublog.info('removing stale temp...')
                _remove(temp)
            if os.path.exists(symlink):  # it's already published
                if os.path.exists(data) and not os.path.exists(snap):
                    # `data` is present and is the best we have to publish
                    sublog.info('snapshotting...')
                    reflink.always(data, temp, preserve=True)
                    os.rename(temp, snap)
                if os.path.exists(snap):
                    # link to a consistent snapshot while we work on `data`
                    _symlink(snap, symlink)

            for source in sources:
                sublog.info(f'trying {source}...')
                try:
                    meth(sublog, source, snap, data, **extra_args)
                    assert os.path.exists(data)
                    break
                except Exception as _:
                    traceback.print_exc()
                    failures[resource_name].append(source)
                    fingertip.util.log.warning(f'failed to mirror {source}')

            if len(failures[resource_name]) == len(sources):
                sublog.error(f'failed to mirror '
                             f'from all {len(sources)} sources')
                total_failures.append(resource_name)
                continue

            _symlink(data, symlink)
            if os.path.exists(snap):
                os.rename(snap, temp)  # move it out the way asap
                sublog.info('removing now obsolete snapshot...')
                _remove(temp)

            try:
                deduplicate(sublog, resource_name, timeout=1)
            except lock.LockTimeout:
                log.warning('skipped deduplication, db was locked')
    if total_failures:
        fingertip.util.log.error(f'failed: {", ".join(total_failures)}')
        raise SystemExit()
    log.info('saviour has completed mirroring')