示例#1
0
 def mv(src, out):
     fname = src.name
     self._fire_event('begin_move', fname)
     with fopen(fname, mode='rb') as f:
         while True:
             data = f.read(buf_size)
             if len(data) == 0:
                 break
             out.write(data)
             out.flush()
     os.remove(fname)
     self._fire_event('end_move', fname)
示例#2
0
def set_tag_value(filename, name, value):
    with fopen(filename, 'rb+') as f:
        f.seek(len(MAGIC) + 16)
        encoding = read_byte_string(f, U_CHAR).decode(UTF8)
        if encodings.search_function(encoding) is None:
            raise UnknownEncoding(encoding)
        f = StructWriter(StructReader(f, encoding=encoding), encoding=encoding)
        f.read_tiny_text()
        tag_count = f.read_byte()
        for _ in range(tag_count):
            key = f.read_tiny_text()
            if key == name:
                f.write_tiny_text(value, editable=True)
                return
            f.read_tiny_text()
    raise TagNotFound(name)
示例#3
0
 def _sort(self):
     self._fire_event('begin_sort')
     f_ref_positions_sorted = self._wbfopen('ref-positions-sorted')
     self.f_refs.flush()
     self.f_ref_positions.close()
     with MultiFileReader(self.f_ref_positions.name, self.f_refs.name) as f:
         ref_list = RefList(f, self.encoding, count=self.ref_count)
         sortkey_func = sortkey(IDENTICAL)
         for i in sorted(range(len(ref_list)),
                         key=lambda j: sortkey_func(ref_list[j].key)):
             ref_pos = ref_list.pos(i)
             f_ref_positions_sorted.write_long(ref_pos)
     f_ref_positions_sorted.close()
     os.remove(self.f_ref_positions.name)
     os.rename(f_ref_positions_sorted.name, self.f_ref_positions.name)
     self.f_ref_positions = StructWriter(fopen(self.f_ref_positions.name,
                                               'ab'),
                                         encoding=self.encoding)
     self._fire_event('end_sort')
示例#4
0
 def __init__(self, *args):
     filenames = []
     for arg in args:
         if isinstance(arg, str):
             filenames.append(arg)
         else:
             for name in arg:
                 filenames.append(name)
     files = []
     ranges = []
     offset = 0
     for name in filenames:
         size = os.stat(name).st_size
         ranges.append(range(offset, offset + size))
         files.append(fopen(name, 'rb'))
         offset += size
     self.size = offset
     self._ranges = ranges
     self._files = files
     self._fcount = len(self._files)
     self._offset = -1
     self.seek(0)
示例#5
0
 def _wbfopen(self, name):
     return StructWriter(fopen(os.path.join(self.tmpdir.name, name), 'wb'),
                         encoding=self.encoding)
示例#6
0
    def __init__(
        self,
        filename,
        workdir=None,
        encoding=UTF8,
        compression=DEFAULT_COMPRESSION,
        min_bin_size=512 * 1024,
        max_redirects=5,
        observer=None,
    ):
        self.filename = filename
        self.observer = observer
        if os.path.exists(self.filename):
            raise SystemExit('File %r already exists' % self.filename)

        # make sure we can write
        with fopen(self.filename, 'wb'):
            pass

        self.encoding = encoding

        if encodings.search_function(self.encoding) is None:
            raise UnknownEncoding(self.encoding)

        self.workdir = workdir

        self.tmpdir = tmpdir = tempfile.TemporaryDirectory(
            prefix='{0}-'.format(os.path.basename(filename)), dir=workdir)

        self.f_ref_positions = self._wbfopen('ref-positions')
        self.f_store_positions = self._wbfopen('store-positions')
        self.f_refs = self._wbfopen('refs')
        self.f_store = self._wbfopen('store')

        self.max_redirects = max_redirects
        if max_redirects:
            self.aliases_path = os.path.join(tmpdir.name, 'aliases')
            self.f_aliases = Writer(
                self.aliases_path,
                workdir=tmpdir.name,
                max_redirects=0,
                compression=None,
            )

        if compression is None:
            compression = ''
        if compression not in COMPRESSIONS:
            raise UnknownCompression(compression)
        else:
            self.compress = COMPRESSIONS[compression].compress

        self.compression = compression
        self.content_types = {}

        self.min_bin_size = min_bin_size

        self.current_bin = None

        self.blob_count = 0
        self.ref_count = 0
        self.bin_count = 0
        self._tags = {
            'version.python': sys.version.replace('\n', ' '),
            'version.pyicu': icu.VERSION,
            'version.icu': icu.ICU_VERSION,
            'created.at': datetime.now(timezone.utc).isoformat()
        }
        self.tags = MappingProxyType(self._tags)
示例#7
0
    def finalize(self):
        self._fire_event('begin_finalize')
        if self.current_bin is not None:
            self._write_current_bin()

        self._sort()
        if self.max_redirects:
            self._resolve_aliases()

        files = (
            self.f_ref_positions,
            self.f_refs,
            self.f_store_positions,
            self.f_store,
        )

        for f in files:
            f.close()

        buf_size = 10 * 1024 * 1024

        with fopen(self.filename, mode='wb') as output_file:
            out = StructWriter(output_file, self.encoding)
            out.write(MAGIC)
            out.write(uuid4().bytes)
            out.write_tiny_text(self.encoding, encoding=UTF8)
            out.write_tiny_text(self.compression)

            def write_tags(tags, f):
                f.write(pack(U_CHAR, len(tags)))
                for key, value in tags.items():
                    f.write_tiny_text(key)
                    f.write_tiny_text(value, editable=True)

            write_tags(self.tags, out)

            def write_content_types(content_types, f):
                count = len(content_types)
                f.write(pack(U_CHAR, count))
                types = sorted(content_types.items(), key=lambda x: x[1])
                for content_type, _ in types:
                    f.write_text(content_type)

            write_content_types(self.content_types, out)

            out.write_int(self.blob_count)
            store_offset = (
                out.tell() + U_LONG_LONG_SIZE +  # this value
                U_LONG_LONG_SIZE +  # file size value
                U_INT_SIZE +  # ref count value
                os.stat(self.f_ref_positions.name).st_size +
                os.stat(self.f_refs.name).st_size)
            out.write_long(store_offset)
            out.flush()

            file_size = (
                out.tell() +  # bytes written so far
                U_LONG_LONG_SIZE +  # file size value
                2 * U_INT_SIZE  # ref count and bin count
            )
            file_size += sum((os.stat(f.name).st_size for f in files))
            out.write_long(file_size)

            def mv(src, out):
                fname = src.name
                self._fire_event('begin_move', fname)
                with fopen(fname, mode='rb') as f:
                    while True:
                        data = f.read(buf_size)
                        if len(data) == 0:
                            break
                        out.write(data)
                        out.flush()
                os.remove(fname)
                self._fire_event('end_move', fname)

            out.write_int(self.ref_count)
            mv(self.f_ref_positions, out)
            mv(self.f_refs, out)

            out.write_int(self.bin_count)
            mv(self.f_store_positions, out)
            mv(self.f_store, out)

        self.tmpdir.cleanup()
        self._fire_event('end_finalize')