def parse_xml(site: Site, gramps_file_path: str) -> None: cache_directory_path = path.join( site.configuration.cache_directory_path, Gramps.name(), hashlib.md5(gramps_file_path.encode('utf-8')).hexdigest()) with suppress(FileExistsError): makedirs(cache_directory_path) logger = logging.getLogger() logger.info('Parsing %s...' % gramps_file_path) try: gramps_file = gzip.open(gramps_file_path) try: tarfile.open(fileobj=gramps_file).extractall(cache_directory_path) gramps_file_path = path.join(cache_directory_path, 'data.gramps') # Treat the file as a tar archive (*.gpkg) with media and a gzipped XML file (./data.gz/data). _parse_tree(site.ancestry, etree.parse(gramps_file_path), cache_directory_path) except tarfile.ReadError: # Treat the file as a gzipped XML file (*.gramps). _parse_tree(site.ancestry, etree.parse(gramps_file), rootname(gramps_file_path)) except OSError: # Treat the file as plain XML (*.gramps). _parse_tree(site.ancestry, etree.parse(gramps_file_path), rootname(gramps_file_path))
def __init__(self, session: aiohttp.ClientSession, cache_directory_path: str, ttl: int = 86400): self._cache_directory_path = join(cache_directory_path, 'wikipedia') makedirs(self._cache_directory_path) self._ttl = ttl self._session = session
async def generate_configuration_file(destination_file_path: str, jinja2_environment: Environment, **kwargs) -> None: configuration_file_template_path = path.join(path.dirname(__file__), 'assets', 'nginx.conf.j2') template = FileSystemLoader('/').load(jinja2_environment, configuration_file_template_path, jinja2_environment.globals) makedirs(path.dirname(destination_file_path)) with open(destination_file_path, 'w') as f: f.write(await template.render_async(kwargs))
def _render_config(self) -> None: output_directory_path = os.path.join(self._site.configuration.output_directory_path, 'nginx') makedirs(output_directory_path) # Render the ngnix configuration. file_name = 'nginx.conf.j2' destination_file_path = os.path.join(output_directory_path, file_name) self._site.resources.copy2(file_name, destination_file_path) # Render the Dockerfile. render_file(destination_file_path, create_environment(self._site)) copyfile(os.path.join(DOCKER_PATH, 'Dockerfile'), os.path.join(output_directory_path, 'Dockerfile'))
def one(self, link: Link) -> Optional[Entry]: parts = urlparse(link.uri) if parts.scheme not in ['http', 'https']: return None if not re.fullmatch(r'^[a-z]+\.wikipedia\.org$', parts.netloc, re.IGNORECASE): return None if not re.fullmatch(r'^/wiki/.+$', parts.path, re.IGNORECASE): return None language_code, domain, _ = parts.netloc.split('.') title = parts.path[6:] uri = 'https://%s.wikipedia.org/w/api.php?action=query&titles=%s&prop=extracts&exintro&format=json&formatversion=2' % ( language_code, title) cache_directory_path = join(self._cache_directory_path, 'wikipedia') makedirs(cache_directory_path) cache_file_path = join(cache_directory_path, hashlib.md5(uri.encode('utf-8')).hexdigest()) response_data = None try: if getmtime(cache_file_path) + self._ttl > time(): with open(cache_file_path) as f: response_data = load(f) except FileNotFoundError: pass if response_data is None: try: response = requests.get(uri) response_data = response.json() with open(cache_file_path, 'w') as f: f.write(response.text) except (RequestException, ValueError) as e: logger = logging.getLogger() logger.warn( 'Could not retrieve content from Wikipedia at %s: %s' % (uri, e)) if response_data is None: try: with open(cache_file_path) as f: response_data = load(f) except FileNotFoundError: pass if response_data is None: return None page = response_data['query']['pages'][0] return Entry(link.uri, page['title'], page['extract'])
def _filter_image(site: Site, file: File, width: Optional[int] = None, height: Optional[int] = None) -> str: if width is None and height is None: raise ValueError('At least the width or height must be given.') with Image.open(file.path) as image: if width is not None: width = min(width, image.width) if height is not None: height = min(height, image.height) if width is None: size = height suffix = '-x%d' convert = resizeimage.resize_height elif height is None: size = width suffix = '%dx-' convert = resizeimage.resize_width else: size = (width, height) suffix = '%dx%d' convert = resizeimage.resize_cover file_directory_path = os.path.join( site.configuration.www_directory_path, 'file') destination_name = '%s-%s.%s' % (file.id, suffix % size, file.extension) destination_path = '/file/%s' % destination_name cache_directory_path = join(site.configuration.cache_directory_path, 'image') cache_file_path = join( cache_directory_path, '%s-%s' % (hashfile(file.path), destination_name)) output_file_path = join(file_directory_path, destination_name) try: os.link(cache_file_path, output_file_path) except FileExistsError: pass except FileNotFoundError: if exists(output_file_path): return destination_path makedirs(cache_directory_path) convert(image, size).save(cache_file_path) makedirs(file_directory_path) os.link(cache_file_path, output_file_path) return destination_path
async def _generate_config(self, event: PostGenerateEvent) -> None: output_directory_path = os.path.join( self._site.configuration.output_directory_path, 'nginx') makedirs(output_directory_path) # Render the ngnix configuration. file_name = 'nginx.conf.j2' destination_file_path = os.path.join(output_directory_path, file_name) await self._site.assets.copy2(file_name, destination_file_path) await self._site.renderer.render_file(destination_file_path) # Render the Dockerfile. copyfile(os.path.join(DOCKER_PATH, 'Dockerfile'), os.path.join(output_directory_path, 'Dockerfile'))
async def generate_configuration_file(destination_file_path: str, jinja2_environment: Environment, **kwargs) -> None: root_path = rootname(__file__) configuration_file_template_name = '/'.join( Path( path.relpath( path.join(path.dirname(__file__), 'assets', 'nginx.conf.j2'), root_path)).parts) template = FileSystemLoader(root_path).load( jinja2_environment, configuration_file_template_name, jinja2_environment.globals) makedirs(path.dirname(destination_file_path)) with open(destination_file_path, 'w') as f: f.write(template.render(kwargs))
def _filter_file(site: Site, file: File) -> str: file_directory_path = os.path.join(site.configuration.www_directory_path, 'file') destination_name = '%s.%s' % (file.id, file.extension) destination_path = '/file/%s' % destination_name output_destination_path = os.path.join(file_directory_path, destination_name) if exists(output_destination_path): return destination_path makedirs(file_directory_path) copy2(file.path, output_destination_path) return destination_path
def extract_xml_file(gramps_file_path: str, cache_directory_path: str) -> str: try: makedirs(cache_directory_path) except FileExistsError: pass ungzipped_outer_file = gzip.open(gramps_file_path) xml_file_path = join(cache_directory_path, 'data.xml') logger = logging.getLogger() logger.info('Extracting %s...' % xml_file_path) with open(xml_file_path, 'wb') as xml_file: try: tarfile.open( fileobj=ungzipped_outer_file).extractall(cache_directory_path) gramps_file_path = join(cache_directory_path, 'data.gramps') xml_file.write(gzip.open(gramps_file_path).read()) except tarfile.ReadError: xml_file.write(ungzipped_outer_file.read()) return xml_file_path
def _execute_filter_image(image: Image, file_path: str, cache_directory_path: str, destination_directory_path: str, destination_name: str, width: int, height: int) -> None: makedirs(destination_directory_path) cache_file_path = join(cache_directory_path, '%s-%s' % (hashfile(file_path), destination_name)) destination_file_path = join(destination_directory_path, destination_name) try: link_or_copy(cache_file_path, destination_file_path) except FileNotFoundError: makedirs(cache_directory_path) with image: if width is not None: width = min(width, image.width) if height is not None: height = min(height, image.height) if width is None: size = height convert = resizeimage.resize_height elif height is None: size = width convert = resizeimage.resize_width else: size = (width, height) convert = resizeimage.resize_cover convert(image, size).save(cache_file_path) makedirs(destination_directory_path) link_or_copy(cache_file_path, destination_file_path)
def _do_filter_image(file_path: str, cache_directory_path: str, destination_directory_path: str, destination_name: str, convert: Callable, size: Tuple[int, int]) -> None: makedirs(destination_directory_path) cache_file_path = join(cache_directory_path, '%s-%s' % (hashfile(file_path), destination_name)) destination_file_path = join(destination_directory_path, destination_name) try: os.link(cache_file_path, destination_file_path) except FileNotFoundError: makedirs(cache_directory_path) with Image.open(file_path) as image: convert(image, size).save(cache_file_path) makedirs(destination_directory_path) os.link(cache_file_path, destination_file_path)
async def generate_dockerfile_file(destination_file_path: str) -> None: makedirs(path.dirname(destination_file_path)) copyfile( path.join(path.dirname(__file__), 'assets', 'docker', 'Dockerfile'), destination_file_path)
def _create_file(path: str) -> object: makedirs(os.path.dirname(path)) return open(path, 'w')
def _do_filter_file(file_path: str, destination_directory_path: str, destination_name: str) -> None: makedirs(destination_directory_path) destination_file_path = os.path.join(destination_directory_path, destination_name) link_or_copy(file_path, destination_file_path)
def _generate_openapi(www_directory_path: str, app: App) -> None: api_directory_path = join(www_directory_path, 'api') makedirs(api_directory_path) with open(join(api_directory_path, 'index.json'), 'w') as f: dump(build_specification(app), f)
def __init__(self, cache_directory_path: str, ttl: int = 86400): self._cache_directory_path = join(cache_directory_path, 'wikipedia') makedirs(self._cache_directory_path) self._ttl = ttl