Пример #1
0
 def wrap_fp(fp):
     if suffix == ".gz":
         fp = GzipFile(fileobj=fp, mode=mode)
     elif suffix == ".bz2":
         try:
             fp = BZ2File(fp, mode=mode)
         except TypeError:
             if sys.version_info < (3, 0, 0):
                 raise NotImplementedError(
                     "built-in BZ2File is partially broken in python 2, install bz2file from pypi or use a compression setting other than 'bz2'"
                 )
             else:
                 raise
     elif suffix == ".xz":
         fp = LZMAFile(fp, mode=mode)
     if (suffix or sys.version_info < (3, )) and "b" not in mode:
         # If mode is not binary (and we expect to be able to
         # write() str values, not bytes), need need to create
         # an additional encoding wrapper. That encoder can
         # probably use UTF-8 without any need for additional
         # configuration
         if "r" in mode and "w" in mode:
             fp = StreamReaderWriter(fp, codecs.getreader("utf-8"),
                                     codecs.getwriter("utf-8"))
         elif "w" in mode:
             fp = codecs.getwriter("utf-8")(fp)
         elif suffix:
             fp = codecs.getreader("utf-8")(fp)
     fp.realname = filename
     return fp
Пример #2
0
def buildhtml(
    trace,
    layout,
    template = join(dirname(util.__file__),"libs/sankey.js.xz"),
    outpath = join(gettempdir(), "temp-plot.html"),
    updatejs = False
    ):

    js = join(dirname(outpath),"sankey.js")
    if updatejs or not os.path.exists(js):
        with LZMAFile(template, 'rb') as fp, open(js, "wb") as wp:
            wp.write(fp.read())

    html = tmpl.format(
        trace=json.dumps(trace),
        layout=json.dumps(layout)
    )

    with open(outpath, "w") as fp:
        fp.write(html)

    if os.name == "nt":
        code, dat = getstatusoutput("start " + outpath)
        if code != 0:
            raise RuntimeError(dat)
    else:
        print("output: " + outpath)
Пример #3
0
 def __load_all_messages(self, author):
     """
     Note: If the quotes file doesn't exist (can happen) this will throw.
     """
     with LZMAFile(self.__quotes_file_name(author), 'r') as f:
         lines = f.read().decode('utf-8').split('\n')
         return [self.__remove_mentions(self.__unescape_message(line)) for line in lines]
Пример #4
0
def _get_checkpoint_dir():
    from appdirs import AppDirs
    dirs = AppDirs(appname="nimare", appauthor="neurostuff", version="1.0")
    checkpoint_dir = os.path.join(dirs.user_data_dir, "ohbm2018_model")
    if not os.path.exists(checkpoint_dir):
        LGR.info('Downloading the model (this is a one-off operation)...')
        url = "https://zenodo.org/record/1257721/files/ohbm2018_model.tar.xz?download=1"
        # Streaming, so we can iterate over the response.
        r = requests.get(url, stream=True)
        f = BytesIO()

        # Total size in bytes.
        total_size = int(r.headers.get('content-length', 0))
        block_size = 1024 * 1024
        wrote = 0
        for data in tqdm(r.iter_content(block_size), total=math.ceil(total_size // block_size),
                         unit='MB', unit_scale=True):
            wrote = wrote + len(data)
            f.write(data)
        if total_size != 0 and wrote != total_size:
            raise Exception("Download interrupted")

        f.seek(0)
        LGR.info('Uncompressing the model to %s...'.format(checkpoint_dir))
        tarfile = TarFile(fileobj=LZMAFile(f), mode="r")
        tarfile.extractall(dirs.user_data_dir)
    return checkpoint_dir
Пример #5
0
def test_wikipedia_export():
    @xml_handle_element("mediawiki", "page", "revision")
    class Revision:
        def __init__(self):
            self.author = None
            self.date = None

        @xml_handle_text("contributor", "username")
        def handle_author(self, node):
            self.author = node.text

        @xml_handle_text("timestamp")
        def handle_date(self, node):
            self.date = datetime.strptime(node.text, "%Y-%m-%dT%H:%M:%SZ")

    with LZMAFile(Path(__file__).parent / "wikipedia_python_export.xml.xz") as stream:
        items = list(Parser(stream).iter_from(Revision))
        assert len(items) == 1000
        assert all(isinstance(item, Revision) for item in items)
        revision = items[-1]
        assert revision.author == "Lulu of the Lotus-Eaters"
        assert revision.date.year == 2006
        assert revision.date.month == 4
        assert revision.date.day == 14
        assert revision.date.hour == 15
        assert revision.date.minute == 58
Пример #6
0
    async def heh_reload(self, message, args):
        log_dir = join(self.local_data_dir, 'log')
        files = [join(log_dir, f) for f in listdir(log_dir) if isfile(join(log_dir, f))]
        self.db = {'users': dict()}

        files_processed = 0
        for i, f in enumerate(sorted(files)):
            match = re.match('^.*/chanlog-([0-9]+-[0-9]+-[0-9]+).txt.xz$', f)
            if match is None:
                continue
            print(f)

            try:
                for line in LZMAFile(f, 'r'):
                    # parse the message into its components (author, timestamps, channel, etc.)
                    m = Message(line.decode('utf-8'))
                    self.__update_db(m.author, m.author_id, m.message)
            except EOFError:  # The latest log file may be open
                pass

            # may take a while, yield every so often
            files_processed += 1
            if files_processed % 10 == 0:
                await asyncio.sleep(0)

        self.__save_db()
        await self.client.send_message(message.channel, 'Done!')
Пример #7
0
    def read_from_file(file, hash_size, compressed = True):
        if compressed:
            with LZMAFile(file, "rb") as lfile:
                tree = HashTree.deserialize_from_bitstream(lfile, hash_size)
        else:
            tree = HashTree.deserialize_from_bitstream(lfile, hash_size)

        return tree
Пример #8
0
 def __open_new_log_if_necessary(self):
     date = datetime.now().strftime('%Y-%m-%d')
     if not self.date == date:
         self.log_file.close()
         self.date = date
         self.log_file = LZMAFile(
             os.path.join(self.log_path,
                          'chanlog-{}.txt.xz'.format(self.date)), 'a')
Пример #9
0
def load_fgd() -> FGD:
    """Extract the local copy of FGD data.

    This allows the analysis to not depend on local files.
    """

    from lzma import LZMAFile
    with LZMAFile(open_binary(srctools, 'fgd.lzma')) as f:
        return FGD.unserialise(f)
Пример #10
0
    def generate_distfile(self):
        hexsha = self._repo.head.commit.hexsha
        filename = poudriere.DISTFILES / '{}.txz'.format(hexsha)

        if not filename.exists():
            with LZMAFile(filename, 'w', preset=9,
                          format=FORMAT_XZ, check=CHECK_SHA256) as fp:
                self._repo.archive(fp, hexsha)

        return filename
Пример #11
0
    def __init__(self, server_instance, full_name):
        super(Log, self).__init__(server_instance, full_name)

        self.log_path = os.path.join(self.local_data_dir, 'log')
        if not os.path.exists(self.log_path):
            os.makedirs(self.log_path)

        self.date = datetime.now().strftime('%Y-%m-%d')
        self.log_file = LZMAFile(
            os.path.join(self.log_path, 'chanlog-{}.txt.xz'.format(self.date)),
            'a')
Пример #12
0
    def __init__(self, file: BinaryIO, feed_options: Dict[str, Any]) -> None:
        self.file = file
        self.feed_options = feed_options

        format = self.feed_options.get("lzma_format")
        check = self.feed_options.get("lzma_check", -1)
        preset = self.feed_options.get("lzma_preset")
        filters = self.feed_options.get("lzma_filters")
        self.lzmafile = LZMAFile(filename=self.file,
                                 mode="wb",
                                 format=format,
                                 check=check,
                                 preset=preset,
                                 filters=filters)
Пример #13
0
def _read_bytes_xz(filename: str) -> BufferedIOBase:
    '''
    Open and return a filehandle suitable for reading the binary LZMA-archived
    file with the passed filename.

    This private function is intended to be called *only* by the public
    :func:`reading_bytes` function.
    '''

    # This optional stdlib module is guaranteed to exist and hence be safely
    # importable here, due to the above die_unless_filetype() call.
    from lzma import LZMAFile

    # Open and return a filehandle suitable for reading this file.
    return LZMAFile(filename, mode='rb')
def download_frida_server(url, version, fname):
    """Download frida-server-binary."""
    try:
        download_dir = Path(settings.DWD_DIR)
        logger.info('Downloading binary %s', fname)
        dwd_loc = download_dir / fname
        with requests.get(url, stream=True) as r:
            with LZMAFile(r.raw) as f:
                with open(dwd_loc, 'wb') as flip:
                    copyfileobj(f, flip)
        clean_up_old_binaries(download_dir, version)
        return True
    except Exception:
        logger.exception('[ERROR] Downloading Frida Server Binary')
    return False
Пример #15
0
def new_state_sender(filename=None):
    from lzma import LZMAFile
    socket_ = None
    if filename is not None:
        fsfile_ = LZMAFile(filename, 'wb')
    else:
        fsfile_ = _NullFile()
    file_ = None
    try:
        socket_ = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
        socket_.connect(os.environ['STBT_TRACING_SOCKET'])
        file_ = _SocketAndFileWriter(socket_, fsfile_)
    except (KeyError, socket.error):
        file_ = fsfile_
    return StateSender(file_)
Пример #16
0
def download_peaks2maps_model(data_dir=None, overwrite=False, verbose=1):
    """
    Download the trained Peaks2Maps model from OHBM 2018.
    """
    url = "https://zenodo.org/record/1257721/files/ohbm2018_model.tar.xz?download=1"

    temp_dataset_name = 'peaks2maps_model_ohbm2018__temp'
    temp_data_dir = _get_dataset_dir(temp_dataset_name, data_dir=data_dir, verbose=verbose)

    dataset_name = 'peaks2maps_model_ohbm2018'
    data_dir = temp_data_dir.replace(temp_dataset_name, dataset_name)

    desc_file = op.join(data_dir, 'description.txt')
    if op.isfile(desc_file) and overwrite is False:
        shutil.rmtree(temp_data_dir)
        return data_dir

    LGR.info('Downloading the model (this is a one-off operation)...')
    # Streaming, so we can iterate over the response.
    r = requests.get(url, stream=True)
    f = BytesIO()

    # Total size in bytes.
    total_size = int(r.headers.get('content-length', 0))
    block_size = 1024 * 1024
    wrote = 0
    for data in tqdm(r.iter_content(block_size), total=math.ceil(total_size // block_size),
                     unit='MB', unit_scale=True):
        wrote = wrote + len(data)
        f.write(data)
    if total_size != 0 and wrote != total_size:
        raise Exception("Download interrupted")

    f.seek(0)
    LGR.info('Uncompressing the model to {}...'.format(temp_data_dir))
    tarfile = TarFile(fileobj=LZMAFile(f), mode="r")
    tarfile.extractall(temp_data_dir)

    os.rename(op.join(temp_data_dir, 'ohbm2018_model'), data_dir)
    shutil.rmtree(temp_data_dir)

    with open(desc_file, 'w') as fo:
        fo.write('The trained Peaks2Maps model from OHBM 2018.')

    if verbose > 0:
        print('\nDataset moved to {}\n'.format(data_dir))

    return data_dir
Пример #17
0
def _write_bytes_xz(filename: str, is_overwritable: bool) -> BufferedIOBase:
    '''
    Open and return a filehandle suitable for writing the binary LZMA-archived
    file with the passed filename.

    This private function is intended to be called *only* by the public
    :func:`writing_bytes` function.
    '''

    # Avoid circular import dependencies.
    from betse.util.io import iofiles

    # This optional stdlib module is guaranteed to exist and hence be safely
    # importable here, due to the above die_unless_filetype() call.
    from lzma import LZMAFile

    # Open and return a filehandle suitable for e(x)clusively writing this file.
    return LZMAFile(
        filename, mode=iofiles.get_mode_write_bytes(is_overwritable))
Пример #18
0
    def __init__(
        self,
        path: Path,
        *,
        encoding: str,
        warn_uncompressed: bool = True,
        progress_bar: bool = False,
        progress_bar_desc: Optional[str] = None,
    ):
        self.path = path

        self._fp = path.open("rb")
        self._fin: BinaryIO
        if path.suffix == ".gz":
            self._fin = cast(BinaryIO, GzipFile(fileobj=self._fp))
        elif path.suffix == ".bz2":
            self._fin = cast(BinaryIO, BZ2File(self._fp))
        elif path.suffix == ".xz":
            self._fin = cast(BinaryIO, LZMAFile(self._fp))
        elif path.suffix == ".zst":
            self._fin = cast(BinaryIO,
                             ZstdDecompressor().stream_reader(self._fp))
        else:
            if warn_uncompressed:  # pragma: no cover
                _LOGGER.warning(
                    "Could not detect compression type of file '{}' from its "
                    "extension, treating as uncompressed file.",
                    path,
                )
            self._fin = self._fp

        self._progress_bar: Optional[tqdm[None]] = None
        if progress_bar:
            self._progress_bar = tqdm(
                desc=progress_bar_desc or self.path.name,
                total=self.size(),
                unit="B",
                unit_scale=True,
                unit_divisor=1024,
                dynamic_ncols=True,
            )

        super().__init__(self._fin, encoding=encoding)
Пример #19
0
def new_state_sender(filename=None):
    try:
        from lzma import LZMAFile
    except ImportError:
        from backports.lzma import LZMAFile  # pylint:disable=E0611,F0401

    socket_ = None
    if filename is not None:
        fsfile_ = LZMAFile(filename, 'wb')
    else:
        fsfile_ = _NullFile()
    file_ = None
    try:
        socket_ = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
        socket_.connect(os.environ['STBT_TRACING_SOCKET'])
        file_ = _SocketAndFileWriter(socket_, fsfile_)
    except (KeyError, socket.error):
        file_ = fsfile_  # pylint:disable=redefined-variable-type
    return StateSender(file_)
Пример #20
0
    def z_handler(self, dbfile):
        """
        If the database file is compressed, uncompresses it and returns the
        filename of the uncompressed file.
        
        @param dbfile: the name of the file
        @type  dbfile: str
        
        @return: the name of the uncompressed file
        @rtype:  str
        """
        (junk, ext) = os.path.splitext(dbfile)

        if ext == '.bz2':
            from bz2 import BZ2File
            zfd = BZ2File(dbfile)
        elif ext == '.gz':
            from gzip import GzipFile
            zfd = GzipFile(dbfile)
        elif ext == '.xz':
            from lzma import LZMAFile
            zfd = LZMAFile(dbfile)
        else:
            # not compressed (or something odd)
            return dbfile

        import tempfile
        (unzfd, unzname) = tempfile.mkstemp('.repoview')
        self.cleanup.append(unzname)

        unzfd = open(unzname, 'w')

        while True:
            data = zfd.read(16384)
            if not data:
                break
            unzfd.write(data)
        zfd.close()
        unzfd.close()

        return unzname
Пример #21
0
    def _readControl(self):
        ar = arpy.Archive(self.filename)
        ar.read_all_headers()

        if b'control.tar.xz' in ar.archived_files:
            tar = LZMAFile(filename=ar.archived_files[b'control.tar.xz'])
            # NOTE: this requires https://github.com/viraptor/arpy/pull/5

        elif b'control.tar.gz' in ar.archived_files:
            tar = GzipFile(fileobj=ar.archived_files[b'control.tar.gz'])

        else:
            raise ValueError('Unable to find control file')

        raw = TarFile(fileobj=tar)

        control = raw.extractfile('./control').read()
        raw.close()
        tar.close()
        ar.close()

        return control
Пример #22
0
    def __init__(self, path):
        if path.endswith('.gz'):
            self.fd = TarFile.gzopen(path)
        elif path.endswith('.xz'):
            self.fd = TarFile.open(fileobj=LZMAFile(path))
        else:
            raise Exception('Unsupported file type %s' % path)

        self.pkg_info = defaultdict(list)
        self.members = []

        # Extract most used information

        if self.parse_pkginfo():
            self.parse_contents()

        self.name = self.pkg_info.get('pkgname')
        self.desc = self.pkg_info.get('pkgdesc')[0]
        self.depends = self.pkg_info.get('depend') or []
        self.groups = self.pkg_info.get('group') or []

        if isinstance(self.name, (list, tuple)) and len(self.name) == 1:
            self.name = self.name[0]
Пример #23
0
    async def reprocess_cache(self, message, matches):
        # Check if cache is up to date
        date = datetime.now().strftime('%Y-%m-%d')
        if self.cache is not None and self.cache['date'] == date:
            return ()

        # Get list of all channel log files
        files = [join(self.log_dir, f) for f in listdir(self.log_dir) if isfile(join(self.log_dir, f))]
        self.cache = dict()
        self.cache['date'] = datetime.now().strftime('%Y-%m-%d')
        authors = dict()
        total_days = dict()  # Keep track of how many days a user has existed for, so we can calculate averages

        # We don't want to process the last log file, because it doesn't contain a full day's worth of info
        #                                  vvvvv
        for i, f in enumerate(sorted(files)[:-1]):
            match = re.match('^.*/chanlog-([0-9]+-[0-9]+-[0-9]+).txt.xz$', f)
            if match is None:
                continue
            print(f)
            log_stamp = strptime(match.group(1), '%Y-%m-%d')

            # Update the total days counter of all users we've seen so far
            for author in total_days:
                total_days[author] += 1

            # Update cycle counters to the current day
            for k, v in authors.items():
                v['day_cycle_acc_day'].appendleft([0]*24)
                v['day_cycle_acc_week'].appendleft([0]*24)
                v['commands_acc'].appendleft(0)

            try:
                for line in LZMAFile(f, 'r'):
                    # parse the message into its components (author, timestamps, channel, etc.)
                    m = Message(line.decode('utf-8'))

                    # create an entry in the top-level "authors" dict in the cache structure, if not already there
                    if m.author_id not in authors:
                        authors[m.author_id] = new_author_dict(m.author)
                        authors[m.author_id]['userId'] = m.author_id
                        authors[m.author_id]['day_cycle_acc'] = [0]*24
                        authors[m.author_id]['day_cycle_acc_day'] = deque([[0]*24], maxlen=1)
                        authors[m.author_id]['day_cycle_acc_week'] = deque([[0]*24], maxlen=7)
                        authors[m.author_id]['commands_acc'] = deque([0], maxlen=7)
                        total_days[m.author_id] = 1

                    a = authors[m.author_id]

                    # keep track of the total message count
                    a['messages_total'] += 1

                    # See if message contains any commands
                    command_count = len(get_commands_from_message(m.message))
                    a['commands_total'] += command_count
                    a['commands_acc'][0] += command_count

                    # Accumulate message count cycles for later averaging
                    a['day_cycle_acc'][int(m.stamp.tm_hour)] += 1
                    a['day_cycle_acc_day'][0][int(m.stamp.tm_hour)] += 1
                    a['day_cycle_acc_week'][0][int(m.stamp.tm_hour)] += 1

                    # count messages per channel
                    a['channels'][m.channel] = a['channels'].get(m.channel, 0) + 1

                    # count how many messages the user makes for every day
                    key = time.mktime(log_stamp)
                    a['messages_per_day'][key] = a['messages_per_day'].get(key, 0) + 1
            except:
                continue

            # This process does take some time, so yield after processing every file
            await asyncio.sleep(0)

        server_stats = new_author_dict('Server')

        def sum_lists(a, b):
            return [float(sum(x)) for x in zip(*[a, b])]
        def add_dicts(a, b):
            return {x: a.get(x, 0) + b.get(x, 0) for x in set(a).union(b)}

        for author, a in authors.items():
            # Calculate average day cycle using the accumulated cycle
            for i, v in enumerate(a['day_cycle_acc']):
                a['day_cycle_avg'][i] = float(v / total_days[author])
            # There are 7 lists of day cycles that need to be added up, then divided by 7
            a['day_cycle_avg_week'] = [float(sum(x)/7.0) for x in zip(*a['day_cycle_acc_week'])]
            # Days are easier, just use the first (and only) item
            a['day_cycle_avg_day'] = [float(x) for x in a['day_cycle_acc_day'][0]]
            a['messages_last_week'] = int(sum(sum(x) for x in zip(*a['day_cycle_acc_week'])))
            a['commands_last_week'] = int(sum(a['commands_acc']))

            # Accumulate all of these stats into the server stats
            server_stats['messages_total'] += a['messages_total']
            server_stats['messages_last_week'] += a['messages_last_week']
            server_stats['commands_total'] += a['commands_total']
            server_stats['day_cycle_avg'] = sum_lists(server_stats['day_cycle_avg'], a['day_cycle_avg'])
            server_stats['day_cycle_avg_week'] = sum_lists(server_stats['day_cycle_avg_week'], a['day_cycle_avg_week'])
            server_stats['day_cycle_avg_day'] = sum_lists(server_stats['day_cycle_avg_day'], a['day_cycle_avg_day'])
            server_stats['channels'] = add_dicts(server_stats['channels'], a['channels'])
            server_stats['messages_per_day'] = add_dicts(server_stats['messages_per_day'], a['messages_per_day'])

            # Delete the temporary keys before saving
            del a['day_cycle_acc']
            del a['day_cycle_acc_day']
            del a['day_cycle_acc_week']
            del a['commands_acc']

        # Finally, save cache
        self.cache['server'] = server_stats
        self.cache['authors'] = authors
        save_json_compressed(self.cache_file, self.cache)
        return ()
Пример #24
0
failed_members = set()
for server_id in os.listdir("data"):
    if not os.path.isdir(os.path.join("data", server_id)):
        print("skipping file " + os.path.join("data", server_id))
        continue
    if server_id not in info:
        print("Server with ID {} was not found in dumpservers.json.xz file! Skipping...".format(server_id))
        continue
    log_dir = os.path.join("data", server_id, "log2")
    if not os.path.isdir(log_dir):
        print("Server \"{}\" has no logs! Skipping...".format(info[server_id]["name"]))
        continue

    for log_file_name in sorted(os.listdir(log_dir)):
        print("Processing file {} on server {}".format(log_file_name, info[server_id]["name"]))
        log_data = LZMAFile(os.path.join(log_dir, log_file_name), 'r').read().decode('utf-8')
        new_log_file = LZMAFile(os.path.join(log_dir, log_file_name), 'w')
        for line in log_data.split('\n'):
            if not line:
                continue
            m = Message(line)

            if int(m.author_id) == 0:
                for id, member in info[server_id]["members"].items():
                    if m.author == member["name"]:
                        m.author_id = id
                        break
                else:
                    failed_members.add(m.author)

            log_msg = u'[{0}] {1}({2}): {3}: {4}({5}): {6}\n'.format(
Пример #25
0
"""Compile the files in the fgd/ folder into a binary blob."""
from srctools import FGD
from srctools.filesys import RawFileSystem

from lzma import LZMAFile

fgd = FGD()

with RawFileSystem('fgd/') as fs:
    for file in fs:
        fgd.parse_file(fs, file)

with open('srctools/fgd.lzma', 'wb') as f:
    with LZMAFile(f, mode='w') as cf:
        fgd.serialise(cf)
Пример #26
0
def download_peaks2maps_model(data_dir=None, overwrite=False, verbose=1):
    """Download the trained Peaks2Maps model from OHBM 2018.

    .. versionadded:: 0.0.2

    Parameters
    ----------
    data_dir : None or str, optional
        Where to put the trained model.
        If None, then download to the automatic NiMARE data directory.
        Default is None.
    overwrite : bool, optional
        Whether to overwrite an existing model or not. Default is False.
    verbose : int, optional
        Verbosity level. Default is 1.

    Returns
    -------
    data_dir : str
        Path to folder containing model.
    """
    url = "https://zenodo.org/record/1257721/files/ohbm2018_model.tar.xz?download=1"

    temp_dataset_name = "peaks2maps_model_ohbm2018__temp"
    data_dir = _get_dataset_dir("", data_dir=data_dir, verbose=verbose)
    temp_data_dir = _get_dataset_dir(temp_dataset_name,
                                     data_dir=data_dir,
                                     verbose=verbose)

    dataset_name = "peaks2maps_model_ohbm2018"
    if dataset_name not in data_dir:  # allow data_dir to include model folder
        data_dir = temp_data_dir.replace(temp_dataset_name, dataset_name)

    desc_file = op.join(data_dir, "description.txt")
    if op.isfile(desc_file) and overwrite is False:
        shutil.rmtree(temp_data_dir)
        return data_dir

    LGR.info("Downloading the model (this is a one-off operation)...")
    # Streaming, so we can iterate over the response.
    r = requests.get(url, stream=True)
    f = BytesIO()

    # Total size in bytes.
    total_size = int(r.headers.get("content-length", 0))
    block_size = 1024 * 1024
    wrote = 0
    for data in tqdm(
            r.iter_content(block_size),
            total=np.ceil(total_size // block_size),
            unit="MB",
            unit_scale=True,
    ):
        wrote = wrote + len(data)
        f.write(data)
    if total_size != 0 and wrote != total_size:
        raise Exception("Download interrupted")

    f.seek(0)
    LGR.info(f"Uncompressing the model to {temp_data_dir}...")
    tf_file = tarfile.TarFile(fileobj=LZMAFile(f), mode="r")
    tf_file.extractall(temp_data_dir)

    os.rename(op.join(temp_data_dir, "ohbm2018_model"), data_dir)
    shutil.rmtree(temp_data_dir)

    with open(desc_file, "w") as fo:
        fo.write("The trained Peaks2Maps model from OHBM 2018.")

    if verbose > 0:
        print(f"\nDataset moved to {data_dir}\n")

    return data_dir
Пример #27
0
 def __append_message(self, author, message):
     with LZMAFile(self.__quotes_file_name(author), 'a') as f:
         message = self.__escape_message(message) + '\n'
         f.write(message.encode('utf-8'))
Пример #28
0
 def _decompress(cls, stream):
     return LZMAFile(stream, mode="r")
Пример #29
0
def action_export(
    dbase: Path,
    extra_db: Optional[Path],
    tags: FrozenSet[str],
    output_path: Path,
    as_binary: bool,
    engine_mode: bool,
) -> None:
    """Create an FGD file using the given tags."""

    if engine_mode:
        tags = frozenset({'ENGINE'})
    else:
        tags = expand_tags(tags)

    print('Tags expanded to: {}'.format(', '.join(tags)))

    fgd, base_entity_def = load_database(dbase, extra_db)

    if engine_mode:
        # In engine mode, we don't care about specific games.
        print('Collapsing bases...')
        fgd.collapse_bases()

        # Cache these constant sets.
        tags_empty = frozenset('')
        tags_not_engine = frozenset({'-ENGINE', '!ENGINE'})

        print('Merging tags...')
        for ent in fgd:
            # If it's set as not in engine, skip.
            if not tags_not_engine.isdisjoint(get_appliesto(ent)):
                continue
            # Strip applies-to helper and ordering helper.
            ent.helpers[:] = [
                helper for helper in ent.helpers if not helper.IS_EXTENSION
            ]
            # Force everything to inherit from CBaseEntity, since
            # we're then removing any KVs that are present on that.
            if ent.classname != BASE_ENTITY:
                ent.bases = [base_entity_def]

            value: Union[IODef, KeyValues]
            category: Dict[str, Dict[FrozenSet[str], Union[IODef, KeyValues]]]
            base_cat: Dict[str, Dict[FrozenSet[str], Union[IODef, KeyValues]]]
            for attr_name in ['inputs', 'outputs', 'keyvalues']:
                category = getattr(ent, attr_name)
                base_cat = getattr(base_entity_def, attr_name)
                # For each category, check for what value we want to keep.
                # If only one, we keep that.
                # If there's an "ENGINE" tag, that's specifically for us.
                # Otherwise, warn if there's a type conflict.
                # If the final value is choices, warn too (not really a type).
                for key, orig_tag_map in list(category.items()):
                    # Remake the map, excluding non-engine tags.
                    # If any are explicitly matching us, just use that
                    # directly.
                    tag_map = {}
                    for tags, value in orig_tag_map.items():
                        if 'ENGINE' in tags or '+ENGINE' in tags:
                            if value.type is ValueTypes.CHOICES:
                                raise ValueError(
                                    '{}.{}: Engine tags cannot be '
                                    'CHOICES!'.format(ent.classname, key))
                            # Use just this.
                            tag_map = {'': value}
                            break
                        elif '-ENGINE' not in tags and '!ENGINE' not in tags:
                            tag_map[tags] = value

                    if not tag_map:
                        # All were set as non-engine, so it's not present.
                        del category[key]
                        continue
                    elif len(tag_map) == 1:
                        # Only one type, that's the one for the engine.
                        [value] = tag_map.values()
                    else:
                        # More than one tag.
                        # IODef and KeyValues have a type attr.
                        types = {val.type for val in tag_map.values()}
                        if len(types) > 1:
                            print('{}.{} has multiple types! ({})'.format(
                                ent.classname, key,
                                ', '.join([typ.value for typ in types])))
                        # Pick the one with shortest tags arbitrarily.
                        _, value = min(
                            tag_map.items(),
                            key=lambda t: len(t[0]),
                        )

                    # If it's CHOICES, we can't know what type it is.
                    # Guess either int or string, if we can convert.
                    if value.type is ValueTypes.CHOICES:
                        print('{}.{} uses CHOICES type, '
                              'provide ENGINE '
                              'tag!'.format(ent.classname, key))
                        if isinstance(value, KeyValues):
                            assert value.val_list is not None
                            try:
                                for choice_val, name, tag in value.val_list:
                                    int(choice_val)
                            except ValueError:
                                # Not all are ints, it's a string.
                                value.type = ValueTypes.STRING
                            else:
                                value.type = ValueTypes.INT
                            value.val_list = None

                    # Check if this is a shared property among all ents,
                    # and if so skip exporting.
                    if ent.classname != BASE_ENTITY:
                        base_value: Union[KeyValues, IODef]
                        try:
                            [base_value] = base_cat[key].values()
                        except KeyError:
                            pass
                        except ValueError:
                            raise ValueError(
                                f'Base Entity {attr_name[:-1]} "{key}" '
                                f'has multiple tags: {list(base_cat[key].keys())}'
                            )
                        else:
                            if base_value.type is ValueTypes.CHOICES:
                                print(f'Base Entity {attr_name[:-1]} '
                                      f'"{key}"  is a choices type!')
                            elif base_value.type is value.type:
                                del category[key]
                                continue
                            elif attr_name == 'keyvalues' and key == 'model':
                                # This can be sprite or model.
                                pass
                            elif base_value.type is ValueTypes.FLOAT and value.type is ValueTypes.INT:
                                # Just constraining it down to a whole number.
                                pass
                            else:
                                print(
                                    f'{ent.classname}.{key}: {value.type} != base {base_value.type}'
                                )

                    # Blank this, it's not that useful.
                    value.desc = ''
                    category[key] = {tags_empty: value}

        # Add in the base entity definition, and clear it out.
        fgd.entities[BASE_ENTITY.casefold()] = base_entity_def
        base_entity_def.desc = ''
        base_entity_def.helpers = []
        # Strip out all the tags.
        for cat in [
                base_entity_def.inputs, base_entity_def.outputs,
                base_entity_def.keyvalues
        ]:
            for key, tag_map in cat.items():
                [value] = tag_map.values()
                cat[key] = {tags_empty: value}
                if value.type is ValueTypes.CHOICES:
                    raise ValueError('Choices key in CBaseEntity!')
    else:
        print('Culling incompatible entities...')

        ents = list(fgd.entities.values())
        fgd.entities.clear()

        for ent in ents:
            applies_to = get_appliesto(ent)
            if match_tags(tags, applies_to):
                fgd.entities[ent.classname] = ent
                ent.strip_tags(tags)

            # Remove bases that don't apply.
            for base in ent.bases[:]:
                if not match_tags(tags, get_appliesto(base)):
                    ent.bases.remove(base)

    if not engine_mode:
        for poly_tag, polyfill in POLYFILLS:
            if not poly_tag or poly_tag in tags:
                polyfill(fgd)

    print('Applying helpers to child entities and optimising...')
    for ent in fgd.entities.values():
        # Merge them together.
        helpers = []
        for base in ent.bases:
            helpers.extend(base.helpers)
        helpers.extend(ent.helpers)

        # Then optimise this list.
        ent.helpers.clear()
        for helper in helpers:
            if helper in ent.helpers:  # No duplicates
                continue
            # Strip applies-to helper.
            if isinstance(helper, HelperExtAppliesTo):
                continue

            # For each, check if it makes earlier ones obsolete.
            overrides = helper.overrides()
            if overrides:
                ent.helpers[:] = [
                    helper for helper in ent.helpers
                    if helper.TYPE not in overrides
                ]

            # But it itself should be added to the end regardless.
            ent.helpers.append(helper)

    print('Culling unused bases...')
    used_bases = set()  # type: Set[EntityDef]
    # We only want to keep bases that provide keyvalues or additional bases.
    # We've merged the helpers in.
    for ent in fgd.entities.values():
        if ent.type is not EntityTypes.BASE:
            for base in ent.iter_bases():
                if base.type is EntityTypes.BASE and (base.keyvalues
                                                      or base.inputs
                                                      or base.outputs
                                                      or base.bases):
                    used_bases.add(base)

    for classname, ent in list(fgd.entities.items()):
        if ent.type is EntityTypes.BASE:
            if ent not in used_bases:
                del fgd.entities[classname]
                continue
            else:
                # Helpers aren't inherited, so this isn't useful anymore.
                ent.helpers.clear()
        # Cull all base classes we don't use.
        # Ents that inherit from each other always need to exist.
        ent.bases = [
            base for base in ent.bases
            if base.type is not EntityTypes.BASE or base in used_bases
        ]

    print('Merging in material exclusions...')
    for mat_tags, materials in fgd.tagged_mat_exclusions.items():
        if match_tags(tags, mat_tags):
            fgd.mat_exclusions |= materials
    fgd.tagged_mat_exclusions.clear()

    print('Culling visgroups...')
    # Cull visgroups that no longer exist for us.
    valid_ents = {
        ent.classname.casefold()
        for ent in fgd.entities.values() if ent.type is not EntityTypes.BASE
    }
    for key, visgroup in list(fgd.auto_visgroups.items()):
        visgroup.ents.intersection_update(valid_ents)
        if not visgroup.ents:
            del fgd.auto_visgroups[key]

    print('Exporting...')

    if as_binary:
        with open(output_path, 'wb') as bin_f, LZMAFile(bin_f, 'w') as comp:
            fgd.serialise(comp)
    else:
        with open(output_path, 'w', encoding='iso-8859-1') as txt_f:
            fgd.export(txt_f)
            # BEE2 compatibility, don't make it run.
            if 'P2' in tags:
                txt_f.write('\n// BEE 2 EDIT FLAG = 0 \n')
Пример #30
0
 def write_to_file(self, file, compressed = True):
     if compressed:
         with LZMAFile(file, "wb") as lfile:
             self.serialize_to_bitstream(lfile)
     else:
         self.serialize_to_bitstream(lfile)