示例#1
0
def ingest(key):
    _check_base(request)

    if app.config['archive'].get('mode', 'read-only') != 'read-write':
        return 'Archive is read-only', 500

    with TemporaryDirectory() as tempdir:
        # TODO: this probably breaks with very large packages
        # TODO: implement serialize(...) so this can be done over TAR
        # TODO: validate checksum
        # TODO: cleanup on failure
        for path in request.files:
            temppath = join(tempdir, valid_path(path))

            if not exists(dirname(temppath)):
                makedirs(dirname(temppath))

            with open(temppath, mode='wb') as o:
                b = None
                while b != b'':
                    b = request.files[path].read(100 * 1024)
                    o.write(b)

        key = archive.ingest(Package(tempdir), key=key)
        package = archive.get(key)

        return redirect('/%s/' % key, code=201)
示例#2
0
    def add(self,
            fname=None,
            path=None,
            data=None,
            url=None,
            traverse=True,
            exclude='^\\..*|^_.*',
            replace=False,
            type='Resource',
            **kwargs):
        if self._mode != 'a':
            raise Exception('package not writable, open in \'a\' mode')

        if not (fname or (path and data) or
                (path and url and type == 'Reference')):
            raise Exception(
                'Specify either path to a filename or path and data)')

        path = path or basename(abspath(fname))

        if path == '_package.json' or path == '_log':
            raise Exception(f'path ({path}) not allowed')

        if fname and traverse and isdir(fname):
            self._add_directory(fname, path, exclude=exclude)
        else:
            self._write(valid_path(path),
                        iname=fname,
                        data=data,
                        url=url,
                        replace=replace,
                        type=type,
                        **kwargs)

        self._reload()
示例#3
0
    def exists(self, key, path=None):
        #d = self._directory(valid_key(key))
        l = self.location(valid_key(key),
                          path=valid_path(path) if path else None)

        if l:
            return exists(l[7:])

        #if path:
        #    return exists(join(d, valid_path(path)))
        #else:
        #    return exists(d)

        return False
示例#4
0
    def ingest(self, package, key=None, copy=True):
        def scrub_ids(x):
            x = deepcopy(x)
            x['@id'] = x['path']

            if copy and x['@type'] == 'Reference':
                x['@type'] = 'Resource'

                if 'url' in x:
                    del x['url']

            return x

        self._check_mode()
        key = self._generate_key(suggest=valid_key(key) if key else None)

        with self.lockm.get(key):
            dir = self._directory(key)
            makedirs(dir + sep)

            try:
                for path in package:
                    if package.get(path)['@type'] != 'Reference' or copy:
                        self._copy(package.get_raw(path),
                                   join(dir, valid_path(path)))

                with open(join(dir, '_package.json'), mode='w') as o:
                    d = deepcopy(package.description())
                    d['@id'] = ''
                    d['files'] = [scrub_ids(x) for x in d['files']]
                    o.write(dumps(d, indent=2))

                # TODO copy meta-files too

                p = starch.Package(dir,
                                   base=self.base + key +
                                   '/' if self.base else None)
                p.validate()
            except Exception as e:
                rmtree(dir)
                raise e

            self._log_add_package(key)
            self._callback('ingest', key=key, package=p)

            return key
示例#5
0
    def location(self, key, path=None):
        d = self._directory(valid_key(key))
        p = join(d, valid_path(path)) if path else d

        if exists(p):
            return 'file://' + p
        else:
            package = self.get(key)

            if package and path in package:
                f = package.get(path)

                if f['@type'] == 'Reference' and 'url' in f:
                    u = f['url']
                    scheme = u[:u.index(':')]

                    if scheme in self.resolutions:
                        u = 'file://' + self.resolutions[scheme] + u[
                            u.index(':') + 1:]

                    return u

        return None
示例#6
0
def put_file(key, path):
    _check_base(request)

    #print(request.args)

    type = str(request.args.get('type', 'Resource'))

    if type != 'Reference' and 'expected_hash' not in request.args:
        #print(type, type == 'Reference', flush=True)
        return 'parameter expected_hash missing', 400

    try:
        p = archive.get(key, mode='a')

        if p:
            path = valid_path(path)
            url = request.args.get('url', None)
            expected_hash = request.args.get('expected_hash', None)
            replace = request.args.get('replace', 'False') == 'True'

            args = {
                k: v
                for k, v in request.args.items() if k not in
                ['type', 'path', 'replace', 'url', 'expected_hash']
            }

            if url and type == 'Reference':
                p.add(path=path, url=url, replace=replace, type=type, **args)

                return 'done', 204
            else:
                if path in request.files:
                    if expected_hash.startswith('MD5:'):
                        h = md5()
                    elif expected_hash.startswith('SHA256:'):
                        h = sha256()
                    else:
                        return 'unsupported hash function \'%s\'' % expected_hash.split(
                            ':')[0], 400

                    with NamedTemporaryFile() as tempfile:
                        with open(tempfile.name, 'wb') as o:
                            b = None
                            while b != b'':
                                b = request.files[path].read(100 * 1024)
                                o.write(b)
                                h.update(b)

                        h2 = expected_hash.split(
                            ':')[0] + ':' + h.digest().hex()
                        if h2 != expected_hash:
                            return 'expected hash %s, got %s' % (expected_hash,
                                                                 h2), 400

                        p = archive.get(key, mode='a')
                        p.add(tempfile.name,
                              path=path,
                              replace=replace,
                              type=type,
                              **args)

                    return 'done', 204
                else:
                    return 'path (%s) not found in request' % path, 400
        else:
            return 'package not found', 400
    except Exception as e:
        print_exc()
        print(flush=True)

        return str(e), 500
示例#7
0
    def add(self,
            fname=None,
            path=None,
            data=None,
            url=None,
            type='Resource',
            traverse=True,
            check_version=None,
            exclude='^\\..*|^_.*',
            replace=False,
            **kwargs):
        if self._mode not in ['w', 'a']:
            raise Exception('package not writable, open in \'a\' mode')

        if not (fname or (path and (data is not None or url))):
            raise Exception('Specify either file or path and (data or url)')

        if fname and data:
            raise Exception('Specifying both a file and data is not allowed')

        if url and url.startswith('file:'):
            raise Exception('file-URLs not allowed')

        path = valid_path(path or basename(abspath(fname)))

        if path in self and not replace:
            raise Exception(f'file ({path}) already exists, use replace=True')

        f = {'@id': quote(path), '@type': type, 'path': path}

        with self._lock_ctx():
            if check_version and check_version != self._desc['version']:
                raise Exception(
                    f'File changed, new version is {self._desc["version"]}')

            if path in ['_package.json', '_log']:
                raise Exception(f'filename {path} not allowed')

            if isinstance(data, dict) or isinstance(data, list):
                data = dumps(data).encode('utf-8')

            if fname and isdir(fname):
                if traverse:
                    self._add_directory(fname,
                                        valid_path(path),
                                        exclude=exclude)
                else:
                    raise Exception(
                        'file {fname} is a directory, set traverse=True to add directories'
                    )

                return
            elif fname or data or url and type != 'Reference':
                f['urn'] = uuid4().urn

                if url:
                    f['url'] = url

                try:
                    oname = join(self.root_dir, path)

                    if not replace and exists(oname):
                        raise Exception(
                            'file (%s) already exists, use replace=True' %
                            path)

                    if not exists(dirname(oname)):
                        makedirs(dirname(oname))

                    temppath = join(self.root_dir,
                                    f'{path}-tmp-{str(random())}')

                    h = sha256()
                    with open(fname, 'rb') if fname else BytesIO(
                            data) if data else htopen(
                                url, 'rb') as stream, open(temppath,
                                                           'wb') as out:
                        data, length = None, 0

                        while data != b'':
                            data = stream.read(100 * 1024)
                            out.write(data)
                            h.update(data)
                            size = out.tell()
                except:
                    raise
                else:
                    f['size'] = size
                    f['checksum'] = 'SHA256:' + h.hexdigest()

                    if path in self and self[path]['checksum'] == f['checksum']:
                        f = self[path]
                    else:
                        with Magic(flags=MAGIC_MIME) as m:
                            f['mime_type'] = m.id_filename(temppath).split(
                                ';')[0]

                        move(temppath, oname)
                finally:
                    if exists(temppath):
                        remove(temppath)
            elif url and type == 'Reference':
                f['url'] = url

                if url.startswith('http'):
                    try:
                        with get(url,
                                 headers={'Accept-Encoding': 'identity'},
                                 stream=True) as r:
                            if 'Content-Length' in r.headers:
                                f['size'] = int(r.headers['Content-Length'])

                            if 'Content-Type' in r.headers:
                                f['mime_type'] = r.headers[
                                    'Content-Type'].split(';')[0]
                    except:
                        self._log(f'WARN could not probe URL ({url}')
            else:
                raise Exception('Incompatible parameters')

        if type in ['Meta', 'Structure', 'Content']:
            if 'see_also' not in self._desc:
                self._desc['see_also'] = []

            if path not in self._desc['see_also']:
                self._desc['see_also'] += [path]

        f.update(kwargs)

        self._desc['files'][path] = f

        if type != 'Reference':
            self._log(
                f'STORE "{f["urn"]} {path} size:{f["size"]}{(" " + f["mime_type"]) if "mime_type" in f else ""} {f["checksum"]}'
            )
        else:
            self._log(
                f'REF {url} {path}{(" size:" + str(f["size"])) if "size" in f else ""}{(" " + f["mime_type"]) if "mime_type" in f else ""}'
            )

        self.save()