def _get_checkpoint_dir(): dirs = AppDirs(appname="nimare", appauthor="neurostuff", version="1.0") checkpoint_dir = os.path.join(dirs.user_data_dir, "ohbm2018_model") if not os.path.exists(checkpoint_dir): print("Downloading the model (this is a one-off operation)... ") url = "https://zenodo.org/record/1257721/files/ohbm2018_model.tar.xz?download=1" # Streaming, so we can iterate over the response. r = requests.get(url, stream=True) f = BytesIO() # Total size in bytes. total_size = int(r.headers.get('content-length', 0)) block_size = 1024 * 1024 wrote = 0 for data in tqdm(r.iter_content(block_size), total=math.ceil(total_size // block_size), unit='MB', unit_scale=True): wrote = wrote + len(data) f.write(data) if total_size != 0 and wrote != total_size: raise Exception("Download interrupted") f.seek(0) print("Uncompressing the model to %s..." % checkpoint_dir) tarfile = TarFile(fileobj=LZMAFile(f), mode="r") tarfile.extractall(dirs.user_data_dir) return checkpoint_dir
def test_wikipedia_export(): @xml_handle_element("mediawiki", "page", "revision") class Revision: def __init__(self): self.author = None self.date = None @xml_handle_text("contributor", "username") def handle_author(self, node): self.author = node.text @xml_handle_text("timestamp") def handle_date(self, node): self.date = datetime.strptime(node.text, "%Y-%m-%dT%H:%M:%SZ") with LZMAFile(Path(__file__).parent / "wikipedia_python_export.xml.xz") as stream: items = list(Parser(stream).iter_from(Revision)) assert len(items) == 1000 assert all(isinstance(item, Revision) for item in items) revision = items[-1] assert revision.author == "Lulu of the Lotus-Eaters" assert revision.date.year == 2006 assert revision.date.month == 4 assert revision.date.day == 14 assert revision.date.hour == 15 assert revision.date.minute == 58
def __load_all_messages(self, author): """ Note: If the quotes file doesn't exist (can happen) this will throw. """ with LZMAFile(self.__quotes_file_name(author), 'r') as f: lines = f.read().decode('utf-8').split('\n') return [self.__remove_mentions(self.__unescape_message(line)) for line in lines]
async def heh_reload(self, message, args): log_dir = join(self.local_data_dir, 'log') files = [join(log_dir, f) for f in listdir(log_dir) if isfile(join(log_dir, f))] self.db = {'users': dict()} files_processed = 0 for i, f in enumerate(sorted(files)): match = re.match('^.*/chanlog-([0-9]+-[0-9]+-[0-9]+).txt.xz$', f) if match is None: continue print(f) try: for line in LZMAFile(f, 'r'): # parse the message into its components (author, timestamps, channel, etc.) m = Message(line.decode('utf-8')) self.__update_db(m.author, m.author_id, m.message) except EOFError: # The latest log file may be open pass # may take a while, yield every so often files_processed += 1 if files_processed % 10 == 0: await asyncio.sleep(0) self.__save_db() await self.client.send_message(message.channel, 'Done!')
def wrap_fp(fp): if suffix == ".gz": fp = GzipFile(fileobj=fp, mode=mode) elif suffix == ".bz2": try: fp = BZ2File(fp, mode=mode) except TypeError: if sys.version_info < (3, 0, 0): raise NotImplementedError( "built-in BZ2File is partially broken in python 2, install bz2file from pypi or use a compression setting other than 'bz2'" ) else: raise elif suffix == ".xz": fp = LZMAFile(fp, mode=mode) if (suffix or sys.version_info < (3, )) and "b" not in mode: # If mode is not binary (and we expect to be able to # write() str values, not bytes), need need to create # an additional encoding wrapper. That encoder can # probably use UTF-8 without any need for additional # configuration if "r" in mode and "w" in mode: fp = StreamReaderWriter(fp, codecs.getreader("utf-8"), codecs.getwriter("utf-8")) elif "w" in mode: fp = codecs.getwriter("utf-8")(fp) elif suffix: fp = codecs.getreader("utf-8")(fp) fp.realname = filename return fp
def buildhtml( trace, layout, template = join(dirname(util.__file__),"libs/sankey.js.xz"), outpath = join(gettempdir(), "temp-plot.html"), updatejs = False ): js = join(dirname(outpath),"sankey.js") if updatejs or not os.path.exists(js): with LZMAFile(template, 'rb') as fp, open(js, "wb") as wp: wp.write(fp.read()) html = tmpl.format( trace=json.dumps(trace), layout=json.dumps(layout) ) with open(outpath, "w") as fp: fp.write(html) if os.name == "nt": code, dat = getstatusoutput("start " + outpath) if code != 0: raise RuntimeError(dat) else: print("output: " + outpath)
def read_from_file(file, hash_size, compressed = True): if compressed: with LZMAFile(file, "rb") as lfile: tree = HashTree.deserialize_from_bitstream(lfile, hash_size) else: tree = HashTree.deserialize_from_bitstream(lfile, hash_size) return tree
def load_fgd() -> FGD: """Extract the local copy of FGD data. This allows the analysis to not depend on local files. """ from lzma import LZMAFile with LZMAFile(open_binary(srctools, 'fgd.lzma')) as f: return FGD.unserialise(f)
def generate_distfile(self): hexsha = self._repo.head.commit.hexsha filename = poudriere.DISTFILES / '{}.txz'.format(hexsha) if not filename.exists(): with LZMAFile(filename, 'w', preset=9, format=FORMAT_XZ, check=CHECK_SHA256) as fp: self._repo.archive(fp, hexsha) return filename
def __init__(self, fname, verbose=False, manager=None): if os.path.exists(fname): self.fh = CompressedFile(fname, "rb") self.header_size = struct.unpack("<l", self.fh.read(4))[0] header_bytes = self.fh.read(self.header_size) self.header = pickle.loads(header_bytes) else: # no need to define self.fh or self.header_size because will never be # used, as no item will be found in the empty header self.header = {} self.verbose = verbose self.manager = manager
def _readControl(self): ar = arpy.Archive(self.filename) ar.read_all_headers() if b'control.tar.xz' in ar.archived_files: tar = LZMAFile(filename=ar.archived_files[b'control.tar.xz']) # NOTE: this requires https://github.com/viraptor/arpy/pull/5 elif b'control.tar.gz' in ar.archived_files: tar = GzipFile(fileobj=ar.archived_files[b'control.tar.gz']) else: raise ValueError('Unable to find control file') raw = TarFile(fileobj=tar) control = raw.extractfile('./control').read() raw.close() tar.close() ar.close() return control
class LZMAPlugin: """ Compresses received data using `lzma <https://en.wikipedia.org/wiki/Lempel–Ziv–Markov_chain_algorithm>`_. Accepted ``feed_options`` parameters: - `lzma_format` - `lzma_check` - `lzma_preset` - `lzma_filters` .. note:: ``lzma_filters`` cannot be used in pypy version 7.3.1 and older. See :py:class:`lzma.LZMAFile` for more info about parameters. """ def __init__(self, file: BinaryIO, feed_options: Dict[str, Any]) -> None: self.file = file self.feed_options = feed_options format = self.feed_options.get("lzma_format") check = self.feed_options.get("lzma_check", -1) preset = self.feed_options.get("lzma_preset") filters = self.feed_options.get("lzma_filters") self.lzmafile = LZMAFile(filename=self.file, mode="wb", format=format, check=check, preset=preset, filters=filters) def write(self, data: bytes) -> int: return self.lzmafile.write(data) def close(self) -> None: self.lzmafile.close() self.file.close()
def new_state_sender(filename=None): from lzma import LZMAFile socket_ = None if filename is not None: fsfile_ = LZMAFile(filename, 'wb') else: fsfile_ = _NullFile() file_ = None try: socket_ = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) socket_.connect(os.environ['STBT_TRACING_SOCKET']) file_ = _SocketAndFileWriter(socket_, fsfile_) except (KeyError, socket.error): file_ = fsfile_ return StateSender(file_)
def download_frida_server(url, version, fname): """Download frida-server-binary.""" try: download_dir = Path(settings.DWD_DIR) logger.info('Downloading binary %s', fname) dwd_loc = download_dir / fname with requests.get(url, stream=True) as r: with LZMAFile(r.raw) as f: with open(dwd_loc, 'wb') as flip: copyfileobj(f, flip) clean_up_old_binaries(download_dir, version) return True except Exception: logger.exception('[ERROR] Downloading Frida Server Binary') return False
def _read_bytes_xz(filename: str) -> BufferedIOBase: ''' Open and return a filehandle suitable for reading the binary LZMA-archived file with the passed filename. This private function is intended to be called *only* by the public :func:`reading_bytes` function. ''' # This optional stdlib module is guaranteed to exist and hence be safely # importable here, due to the above die_unless_filetype() call. from lzma import LZMAFile # Open and return a filehandle suitable for reading this file. return LZMAFile(filename, mode='rb')
def GenerateArchive( self, nodeTemplate ): """ Given an xz compressed archive, read off its contents encapsulating each record in a node object for further processing upstream. @param Node nodeTemplate - Node object definition, used to encapsulte records @return None """ fd = LZMAFile( self.process ) # Yank Header Information buf = fd.next() # Begin processing while True: try: line = fd.next() except StopIteration as FinishedProcessing: break yield nodeTemplate( url = line.strip() ) fd.close()
def download_peaks2maps_model(data_dir=None, overwrite=False, verbose=1): """ Download the trained Peaks2Maps model from OHBM 2018. """ url = "https://zenodo.org/record/1257721/files/ohbm2018_model.tar.xz?download=1" temp_dataset_name = 'peaks2maps_model_ohbm2018__temp' temp_data_dir = _get_dataset_dir(temp_dataset_name, data_dir=data_dir, verbose=verbose) dataset_name = 'peaks2maps_model_ohbm2018' data_dir = temp_data_dir.replace(temp_dataset_name, dataset_name) desc_file = op.join(data_dir, 'description.txt') if op.isfile(desc_file) and overwrite is False: shutil.rmtree(temp_data_dir) return data_dir LGR.info('Downloading the model (this is a one-off operation)...') # Streaming, so we can iterate over the response. r = requests.get(url, stream=True) f = BytesIO() # Total size in bytes. total_size = int(r.headers.get('content-length', 0)) block_size = 1024 * 1024 wrote = 0 for data in tqdm(r.iter_content(block_size), total=math.ceil(total_size // block_size), unit='MB', unit_scale=True): wrote = wrote + len(data) f.write(data) if total_size != 0 and wrote != total_size: raise Exception("Download interrupted") f.seek(0) LGR.info('Uncompressing the model to {}...'.format(temp_data_dir)) tarfile = TarFile(fileobj=LZMAFile(f), mode="r") tarfile.extractall(temp_data_dir) os.rename(op.join(temp_data_dir, 'ohbm2018_model'), data_dir) shutil.rmtree(temp_data_dir) with open(desc_file, 'w') as fo: fo.write('The trained Peaks2Maps model from OHBM 2018.') if verbose > 0: print('\nDataset moved to {}\n'.format(data_dir)) return data_dir
def new_state_sender(filename=None): try: from lzma import LZMAFile except ImportError: from backports.lzma import LZMAFile # pylint:disable=E0611,F0401 socket_ = None if filename is not None: fsfile_ = LZMAFile(filename, 'wb') else: fsfile_ = _NullFile() file_ = None try: socket_ = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) socket_.connect(os.environ['STBT_TRACING_SOCKET']) file_ = _SocketAndFileWriter(socket_, fsfile_) except (KeyError, socket.error): file_ = fsfile_ # pylint:disable=redefined-variable-type return StateSender(file_)
def __init__( self, path: Path, *, encoding: str, warn_uncompressed: bool = True, progress_bar: bool = False, progress_bar_desc: Optional[str] = None, ): self.path = path self._fp = path.open("rb") self._fin: BinaryIO if path.suffix == ".gz": self._fin = cast(BinaryIO, GzipFile(fileobj=self._fp)) elif path.suffix == ".bz2": self._fin = cast(BinaryIO, BZ2File(self._fp)) elif path.suffix == ".xz": self._fin = cast(BinaryIO, LZMAFile(self._fp)) elif path.suffix == ".zst": self._fin = cast(BinaryIO, ZstdDecompressor().stream_reader(self._fp)) else: if warn_uncompressed: # pragma: no cover _LOGGER.warning( "Could not detect compression type of file '{}' from its " "extension, treating as uncompressed file.", path, ) self._fin = self._fp self._progress_bar: Optional[tqdm[None]] = None if progress_bar: self._progress_bar = tqdm( desc=progress_bar_desc or self.path.name, total=self.size(), unit="B", unit_scale=True, unit_divisor=1024, dynamic_ncols=True, ) super().__init__(self._fin, encoding=encoding)
def _write_bytes_xz(filename: str, is_overwritable: bool) -> BufferedIOBase: ''' Open and return a filehandle suitable for writing the binary LZMA-archived file with the passed filename. This private function is intended to be called *only* by the public :func:`writing_bytes` function. ''' # Avoid circular import dependencies. from betse.util.io import iofiles # This optional stdlib module is guaranteed to exist and hence be safely # importable here, due to the above die_unless_filetype() call. from lzma import LZMAFile # Open and return a filehandle suitable for e(x)clusively writing this file. return LZMAFile( filename, mode=iofiles.get_mode_write_bytes(is_overwritable))
def z_handler(self, dbfile): """ If the database file is compressed, uncompresses it and returns the filename of the uncompressed file. @param dbfile: the name of the file @type dbfile: str @return: the name of the uncompressed file @rtype: str """ (junk, ext) = os.path.splitext(dbfile) if ext == '.bz2': from bz2 import BZ2File zfd = BZ2File(dbfile) elif ext == '.gz': from gzip import GzipFile zfd = GzipFile(dbfile) elif ext == '.xz': from lzma import LZMAFile zfd = LZMAFile(dbfile) else: # not compressed (or something odd) return dbfile import tempfile (unzfd, unzname) = tempfile.mkstemp('.repoview') self.cleanup.append(unzname) unzfd = open(unzname, 'w') while True: data = zfd.read(16384) if not data: break unzfd.write(data) zfd.close() unzfd.close() return unzname
def __init__(self, path): if path.endswith('.gz'): self.fd = TarFile.gzopen(path) elif path.endswith('.xz'): self.fd = TarFile.open(fileobj=LZMAFile(path)) else: raise Exception('Unsupported file type %s' % path) self.pkg_info = defaultdict(list) self.members = [] # Extract most used information if self.parse_pkginfo(): self.parse_contents() self.name = self.pkg_info.get('pkgname') self.desc = self.pkg_info.get('pkgdesc')[0] self.depends = self.pkg_info.get('depend') or [] self.groups = self.pkg_info.get('group') or [] if isinstance(self.name, (list, tuple)) and len(self.name) == 1: self.name = self.name[0]
class Log(glados.Module): def __init__(self, server_instance, full_name): super(Log, self).__init__(server_instance, full_name) self.log_path = os.path.join(self.local_data_dir, 'log') if not os.path.exists(self.log_path): os.makedirs(self.log_path) self.date = datetime.now().strftime('%Y-%m-%d') self.log_file = LZMAFile( os.path.join(self.log_path, 'chanlog-{}.txt.xz'.format(self.date)), 'a') def __open_new_log_if_necessary(self): date = datetime.now().strftime('%Y-%m-%d') if not self.date == date: self.log_file.close() self.date = date self.log_file = LZMAFile( os.path.join(self.log_path, 'chanlog-{}.txt.xz'.format(self.date)), 'a') @glados.Permissions.spamalot @glados.Module.rule('^.*$', ignorecommands=False) async def on_message(self, message, match): server_name = message.server.name if message.server else '' server_id = message.server.id if message.server else '' self.__open_new_log_if_necessary() info = u'[{0}] {1}({2}): #{3}: {4}({5}): {6}\n'.format( datetime.now().strftime('%Y-%m-%d %H:%M:%S'), server_name, server_id, message.channel.name, message.author.name, message.author.id, message.clean_content) self.log_file.write(info.encode('utf-8')) self.log_file.flush() return ()
def action_export( dbase: Path, extra_db: Optional[Path], tags: FrozenSet[str], output_path: Path, as_binary: bool, engine_mode: bool, ) -> None: """Create an FGD file using the given tags.""" if engine_mode: tags = frozenset({'ENGINE'}) else: tags = expand_tags(tags) print('Tags expanded to: {}'.format(', '.join(tags))) fgd, base_entity_def = load_database(dbase, extra_db) if engine_mode: # In engine mode, we don't care about specific games. print('Collapsing bases...') fgd.collapse_bases() # Cache these constant sets. tags_empty = frozenset('') tags_not_engine = frozenset({'-ENGINE', '!ENGINE'}) print('Merging tags...') for ent in fgd: # If it's set as not in engine, skip. if not tags_not_engine.isdisjoint(get_appliesto(ent)): continue # Strip applies-to helper and ordering helper. ent.helpers[:] = [ helper for helper in ent.helpers if not helper.IS_EXTENSION ] # Force everything to inherit from CBaseEntity, since # we're then removing any KVs that are present on that. if ent.classname != BASE_ENTITY: ent.bases = [base_entity_def] value: Union[IODef, KeyValues] category: Dict[str, Dict[FrozenSet[str], Union[IODef, KeyValues]]] base_cat: Dict[str, Dict[FrozenSet[str], Union[IODef, KeyValues]]] for attr_name in ['inputs', 'outputs', 'keyvalues']: category = getattr(ent, attr_name) base_cat = getattr(base_entity_def, attr_name) # For each category, check for what value we want to keep. # If only one, we keep that. # If there's an "ENGINE" tag, that's specifically for us. # Otherwise, warn if there's a type conflict. # If the final value is choices, warn too (not really a type). for key, orig_tag_map in list(category.items()): # Remake the map, excluding non-engine tags. # If any are explicitly matching us, just use that # directly. tag_map = {} for tags, value in orig_tag_map.items(): if 'ENGINE' in tags or '+ENGINE' in tags: if value.type is ValueTypes.CHOICES: raise ValueError( '{}.{}: Engine tags cannot be ' 'CHOICES!'.format(ent.classname, key)) # Use just this. tag_map = {'': value} break elif '-ENGINE' not in tags and '!ENGINE' not in tags: tag_map[tags] = value if not tag_map: # All were set as non-engine, so it's not present. del category[key] continue elif len(tag_map) == 1: # Only one type, that's the one for the engine. [value] = tag_map.values() else: # More than one tag. # IODef and KeyValues have a type attr. types = {val.type for val in tag_map.values()} if len(types) > 1: print('{}.{} has multiple types! ({})'.format( ent.classname, key, ', '.join([typ.value for typ in types]))) # Pick the one with shortest tags arbitrarily. _, value = min( tag_map.items(), key=lambda t: len(t[0]), ) # If it's CHOICES, we can't know what type it is. # Guess either int or string, if we can convert. if value.type is ValueTypes.CHOICES: print('{}.{} uses CHOICES type, ' 'provide ENGINE ' 'tag!'.format(ent.classname, key)) if isinstance(value, KeyValues): assert value.val_list is not None try: for choice_val, name, tag in value.val_list: int(choice_val) except ValueError: # Not all are ints, it's a string. value.type = ValueTypes.STRING else: value.type = ValueTypes.INT value.val_list = None # Check if this is a shared property among all ents, # and if so skip exporting. if ent.classname != BASE_ENTITY: base_value: Union[KeyValues, IODef] try: [base_value] = base_cat[key].values() except KeyError: pass except ValueError: raise ValueError( f'Base Entity {attr_name[:-1]} "{key}" ' f'has multiple tags: {list(base_cat[key].keys())}' ) else: if base_value.type is ValueTypes.CHOICES: print(f'Base Entity {attr_name[:-1]} ' f'"{key}" is a choices type!') elif base_value.type is value.type: del category[key] continue elif attr_name == 'keyvalues' and key == 'model': # This can be sprite or model. pass elif base_value.type is ValueTypes.FLOAT and value.type is ValueTypes.INT: # Just constraining it down to a whole number. pass else: print( f'{ent.classname}.{key}: {value.type} != base {base_value.type}' ) # Blank this, it's not that useful. value.desc = '' category[key] = {tags_empty: value} # Add in the base entity definition, and clear it out. fgd.entities[BASE_ENTITY.casefold()] = base_entity_def base_entity_def.desc = '' base_entity_def.helpers = [] # Strip out all the tags. for cat in [ base_entity_def.inputs, base_entity_def.outputs, base_entity_def.keyvalues ]: for key, tag_map in cat.items(): [value] = tag_map.values() cat[key] = {tags_empty: value} if value.type is ValueTypes.CHOICES: raise ValueError('Choices key in CBaseEntity!') else: print('Culling incompatible entities...') ents = list(fgd.entities.values()) fgd.entities.clear() for ent in ents: applies_to = get_appliesto(ent) if match_tags(tags, applies_to): fgd.entities[ent.classname] = ent ent.strip_tags(tags) # Remove bases that don't apply. for base in ent.bases[:]: if not match_tags(tags, get_appliesto(base)): ent.bases.remove(base) if not engine_mode: for poly_tag, polyfill in POLYFILLS: if not poly_tag or poly_tag in tags: polyfill(fgd) print('Applying helpers to child entities and optimising...') for ent in fgd.entities.values(): # Merge them together. helpers = [] for base in ent.bases: helpers.extend(base.helpers) helpers.extend(ent.helpers) # Then optimise this list. ent.helpers.clear() for helper in helpers: if helper in ent.helpers: # No duplicates continue # Strip applies-to helper. if isinstance(helper, HelperExtAppliesTo): continue # For each, check if it makes earlier ones obsolete. overrides = helper.overrides() if overrides: ent.helpers[:] = [ helper for helper in ent.helpers if helper.TYPE not in overrides ] # But it itself should be added to the end regardless. ent.helpers.append(helper) print('Culling unused bases...') used_bases = set() # type: Set[EntityDef] # We only want to keep bases that provide keyvalues or additional bases. # We've merged the helpers in. for ent in fgd.entities.values(): if ent.type is not EntityTypes.BASE: for base in ent.iter_bases(): if base.type is EntityTypes.BASE and (base.keyvalues or base.inputs or base.outputs or base.bases): used_bases.add(base) for classname, ent in list(fgd.entities.items()): if ent.type is EntityTypes.BASE: if ent not in used_bases: del fgd.entities[classname] continue else: # Helpers aren't inherited, so this isn't useful anymore. ent.helpers.clear() # Cull all base classes we don't use. # Ents that inherit from each other always need to exist. ent.bases = [ base for base in ent.bases if base.type is not EntityTypes.BASE or base in used_bases ] print('Merging in material exclusions...') for mat_tags, materials in fgd.tagged_mat_exclusions.items(): if match_tags(tags, mat_tags): fgd.mat_exclusions |= materials fgd.tagged_mat_exclusions.clear() print('Culling visgroups...') # Cull visgroups that no longer exist for us. valid_ents = { ent.classname.casefold() for ent in fgd.entities.values() if ent.type is not EntityTypes.BASE } for key, visgroup in list(fgd.auto_visgroups.items()): visgroup.ents.intersection_update(valid_ents) if not visgroup.ents: del fgd.auto_visgroups[key] print('Exporting...') if as_binary: with open(output_path, 'wb') as bin_f, LZMAFile(bin_f, 'w') as comp: fgd.serialise(comp) else: with open(output_path, 'w', encoding='iso-8859-1') as txt_f: fgd.export(txt_f) # BEE2 compatibility, don't make it run. if 'P2' in tags: txt_f.write('\n// BEE 2 EDIT FLAG = 0 \n')
failed_members = set() for server_id in os.listdir("data"): if not os.path.isdir(os.path.join("data", server_id)): print("skipping file " + os.path.join("data", server_id)) continue if server_id not in info: print("Server with ID {} was not found in dumpservers.json.xz file! Skipping...".format(server_id)) continue log_dir = os.path.join("data", server_id, "log2") if not os.path.isdir(log_dir): print("Server \"{}\" has no logs! Skipping...".format(info[server_id]["name"])) continue for log_file_name in sorted(os.listdir(log_dir)): print("Processing file {} on server {}".format(log_file_name, info[server_id]["name"])) log_data = LZMAFile(os.path.join(log_dir, log_file_name), 'r').read().decode('utf-8') new_log_file = LZMAFile(os.path.join(log_dir, log_file_name), 'w') for line in log_data.split('\n'): if not line: continue m = Message(line) if int(m.author_id) == 0: for id, member in info[server_id]["members"].items(): if m.author == member["name"]: m.author_id = id break else: failed_members.add(m.author) log_msg = u'[{0}] {1}({2}): {3}: {4}({5}): {6}\n'.format(
def _decompress(cls, stream): return LZMAFile(stream, mode="r")
async def reprocess_cache(self, message, matches): # Check if cache is up to date date = datetime.now().strftime('%Y-%m-%d') if self.cache is not None and self.cache['date'] == date: return () # Get list of all channel log files files = [join(self.log_dir, f) for f in listdir(self.log_dir) if isfile(join(self.log_dir, f))] self.cache = dict() self.cache['date'] = datetime.now().strftime('%Y-%m-%d') authors = dict() total_days = dict() # Keep track of how many days a user has existed for, so we can calculate averages # We don't want to process the last log file, because it doesn't contain a full day's worth of info # vvvvv for i, f in enumerate(sorted(files)[:-1]): match = re.match('^.*/chanlog-([0-9]+-[0-9]+-[0-9]+).txt.xz$', f) if match is None: continue print(f) log_stamp = strptime(match.group(1), '%Y-%m-%d') # Update the total days counter of all users we've seen so far for author in total_days: total_days[author] += 1 # Update cycle counters to the current day for k, v in authors.items(): v['day_cycle_acc_day'].appendleft([0]*24) v['day_cycle_acc_week'].appendleft([0]*24) v['commands_acc'].appendleft(0) try: for line in LZMAFile(f, 'r'): # parse the message into its components (author, timestamps, channel, etc.) m = Message(line.decode('utf-8')) # create an entry in the top-level "authors" dict in the cache structure, if not already there if m.author_id not in authors: authors[m.author_id] = new_author_dict(m.author) authors[m.author_id]['userId'] = m.author_id authors[m.author_id]['day_cycle_acc'] = [0]*24 authors[m.author_id]['day_cycle_acc_day'] = deque([[0]*24], maxlen=1) authors[m.author_id]['day_cycle_acc_week'] = deque([[0]*24], maxlen=7) authors[m.author_id]['commands_acc'] = deque([0], maxlen=7) total_days[m.author_id] = 1 a = authors[m.author_id] # keep track of the total message count a['messages_total'] += 1 # See if message contains any commands command_count = len(get_commands_from_message(m.message)) a['commands_total'] += command_count a['commands_acc'][0] += command_count # Accumulate message count cycles for later averaging a['day_cycle_acc'][int(m.stamp.tm_hour)] += 1 a['day_cycle_acc_day'][0][int(m.stamp.tm_hour)] += 1 a['day_cycle_acc_week'][0][int(m.stamp.tm_hour)] += 1 # count messages per channel a['channels'][m.channel] = a['channels'].get(m.channel, 0) + 1 # count how many messages the user makes for every day key = time.mktime(log_stamp) a['messages_per_day'][key] = a['messages_per_day'].get(key, 0) + 1 except: continue # This process does take some time, so yield after processing every file await asyncio.sleep(0) server_stats = new_author_dict('Server') def sum_lists(a, b): return [float(sum(x)) for x in zip(*[a, b])] def add_dicts(a, b): return {x: a.get(x, 0) + b.get(x, 0) for x in set(a).union(b)} for author, a in authors.items(): # Calculate average day cycle using the accumulated cycle for i, v in enumerate(a['day_cycle_acc']): a['day_cycle_avg'][i] = float(v / total_days[author]) # There are 7 lists of day cycles that need to be added up, then divided by 7 a['day_cycle_avg_week'] = [float(sum(x)/7.0) for x in zip(*a['day_cycle_acc_week'])] # Days are easier, just use the first (and only) item a['day_cycle_avg_day'] = [float(x) for x in a['day_cycle_acc_day'][0]] a['messages_last_week'] = int(sum(sum(x) for x in zip(*a['day_cycle_acc_week']))) a['commands_last_week'] = int(sum(a['commands_acc'])) # Accumulate all of these stats into the server stats server_stats['messages_total'] += a['messages_total'] server_stats['messages_last_week'] += a['messages_last_week'] server_stats['commands_total'] += a['commands_total'] server_stats['day_cycle_avg'] = sum_lists(server_stats['day_cycle_avg'], a['day_cycle_avg']) server_stats['day_cycle_avg_week'] = sum_lists(server_stats['day_cycle_avg_week'], a['day_cycle_avg_week']) server_stats['day_cycle_avg_day'] = sum_lists(server_stats['day_cycle_avg_day'], a['day_cycle_avg_day']) server_stats['channels'] = add_dicts(server_stats['channels'], a['channels']) server_stats['messages_per_day'] = add_dicts(server_stats['messages_per_day'], a['messages_per_day']) # Delete the temporary keys before saving del a['day_cycle_acc'] del a['day_cycle_acc_day'] del a['day_cycle_acc_week'] del a['commands_acc'] # Finally, save cache self.cache['server'] = server_stats self.cache['authors'] = authors save_json_compressed(self.cache_file, self.cache) return ()
"""Compile the files in the fgd/ folder into a binary blob.""" from srctools import FGD from srctools.filesys import RawFileSystem from lzma import LZMAFile fgd = FGD() with RawFileSystem('fgd/') as fs: for file in fs: fgd.parse_file(fs, file) with open('srctools/fgd.lzma', 'wb') as f: with LZMAFile(f, mode='w') as cf: fgd.serialise(cf)
def download_peaks2maps_model(data_dir=None, overwrite=False, verbose=1): """Download the trained Peaks2Maps model from OHBM 2018. .. versionadded:: 0.0.2 Parameters ---------- data_dir : None or str, optional Where to put the trained model. If None, then download to the automatic NiMARE data directory. Default is None. overwrite : bool, optional Whether to overwrite an existing model or not. Default is False. verbose : int, optional Verbosity level. Default is 1. Returns ------- data_dir : str Path to folder containing model. """ url = "https://zenodo.org/record/1257721/files/ohbm2018_model.tar.xz?download=1" temp_dataset_name = "peaks2maps_model_ohbm2018__temp" data_dir = _get_dataset_dir("", data_dir=data_dir, verbose=verbose) temp_data_dir = _get_dataset_dir(temp_dataset_name, data_dir=data_dir, verbose=verbose) dataset_name = "peaks2maps_model_ohbm2018" if dataset_name not in data_dir: # allow data_dir to include model folder data_dir = temp_data_dir.replace(temp_dataset_name, dataset_name) desc_file = op.join(data_dir, "description.txt") if op.isfile(desc_file) and overwrite is False: shutil.rmtree(temp_data_dir) return data_dir LGR.info("Downloading the model (this is a one-off operation)...") # Streaming, so we can iterate over the response. r = requests.get(url, stream=True) f = BytesIO() # Total size in bytes. total_size = int(r.headers.get("content-length", 0)) block_size = 1024 * 1024 wrote = 0 for data in tqdm( r.iter_content(block_size), total=np.ceil(total_size // block_size), unit="MB", unit_scale=True, ): wrote = wrote + len(data) f.write(data) if total_size != 0 and wrote != total_size: raise Exception("Download interrupted") f.seek(0) LGR.info(f"Uncompressing the model to {temp_data_dir}...") tf_file = tarfile.TarFile(fileobj=LZMAFile(f), mode="r") tf_file.extractall(temp_data_dir) os.rename(op.join(temp_data_dir, "ohbm2018_model"), data_dir) shutil.rmtree(temp_data_dir) with open(desc_file, "w") as fo: fo.write("The trained Peaks2Maps model from OHBM 2018.") if verbose > 0: print(f"\nDataset moved to {data_dir}\n") return data_dir
def write_to_file(self, file, compressed = True): if compressed: with LZMAFile(file, "wb") as lfile: self.serialize_to_bitstream(lfile) else: self.serialize_to_bitstream(lfile)
def __append_message(self, author, message): with LZMAFile(self.__quotes_file_name(author), 'a') as f: message = self.__escape_message(message) + '\n' f.write(message.encode('utf-8'))