class BaseExporter(metaclass=abc.ABCMeta): def __init__(self, source_file_path, output_file_path, format): self.source_file_path = source_file_path self.output_file_path = output_file_path self.format = format self.exporter_metrics = MetricsRecord('exporter') if self._get_module_name(): self.metrics = self.exporter_metrics.new_subrecord(self._get_module_name()) self.exporter_metrics.merge({ 'class': self._get_module_name(), 'format': self.format, 'source_path': str(self.source_file_path), 'output_path': str(self.output_file_path), # 'error': 'error_t', # 'elapsed': 'elpased_t', }) @abc.abstractmethod def export(self): pass def _get_module_name(self): return self.__module__ \ .replace('mfr.extensions.', '', 1) \ .replace('.export', '', 1)
class BaseExporter(metaclass=abc.ABCMeta): def __init__(self, source_file_path, output_file_path, format): self.source_file_path = source_file_path self.output_file_path = output_file_path self.format = format self.exporter_metrics = MetricsRecord('exporter') if self._get_module_name(): self.metrics = self.exporter_metrics.new_subrecord( self._get_module_name()) self.exporter_metrics.merge({ 'class': self._get_module_name(), 'format': self.format, 'source_path': str(self.source_file_path), 'output_path': str(self.output_file_path), # 'error': 'error_t', # 'elapsed': 'elpased_t', }) @abc.abstractmethod def export(self): pass def _get_module_name(self): return self.__module__ \ .replace('mfr.extensions.', '', 1) \ .replace('.export', '', 1)
class BaseProvider(metaclass=abc.ABCMeta): """Base class for MFR Providers. Requires ``download`` and ``metadata`` methods. Validates that the given file url is hosted at a domain listed in `mfr.server.settings.ALLOWED_PROVIDER_DOMAINS`. """ def __init__(self, request, url, action=None): self.request = request url_netloc = furl.furl(url).netloc if url_netloc not in settings.ALLOWED_PROVIDER_NETLOCS: raise exceptions.ProviderError( message="{} is not a permitted provider domain.".format( markupsafe.escape(url_netloc)), code=400) self.url = url self.action = action self.provider_metrics = MetricsRecord('provider') self.metrics = self.provider_metrics.new_subrecord(self.NAME) self.provider_metrics.merge({ 'type': self.NAME, 'url': str(self.url), }) @abc.abstractproperty def NAME(self): raise NotImplementedError @abc.abstractmethod def metadata(self): pass @abc.abstractmethod def download(self): pass
class BaseRenderer(metaclass=abc.ABCMeta): def __init__(self, metadata, file_path, url, assets_url, export_url): self.metadata = metadata self.file_path = file_path self.url = url self.assets_url = '{}/{}'.format(assets_url, self._get_module_name()) self.export_url = export_url self.renderer_metrics = MetricsRecord('renderer') if self._get_module_name(): self.metrics = self.renderer_metrics.new_subrecord(self._get_module_name()) self.renderer_metrics.merge({ 'class': self._get_module_name(), 'ext': self.metadata.ext, 'url': self.url, 'export_url': self.export_url, 'file_path': self.file_path, # 'error': 'error_t', # 'elapsed': 'elpased_t', }) # unoconv gets file_required and cache_result from its subrenderer, which is constructed # at the end of __init__ try: self.renderer_metrics.add('file_required', self.file_required) self.renderer_metrics.add('cache_result', self.cache_result) except AttributeError: pass @abc.abstractmethod def render(self): pass @abc.abstractproperty def file_required(self): """Does the rendering html need the raw file content to display correctly? Syntax-highlighted text files do. Standard image formats do not, since an <img> tag only needs a url to the file. """ pass @abc.abstractproperty def cache_result(self): pass def _get_module_name(self): return self.__module__ \ .replace('mfr.extensions.', '', 1) \ .replace('.render', '', 1)
class BaseRenderer(metaclass=abc.ABCMeta): def __init__(self, metadata, file_path, url, assets_url, export_url): self.metadata = metadata self.file_path = file_path self.url = url self.assets_url = '{}/{}'.format(assets_url, self._get_module_name()) self.export_url = export_url self.renderer_metrics = MetricsRecord('renderer') if self._get_module_name(): self.metrics = self.renderer_metrics.new_subrecord( self._get_module_name()) self.renderer_metrics.merge({ 'class': self._get_module_name(), 'ext': self.metadata.ext, 'url': self.url, 'export_url': self.export_url, 'file_path': self.file_path, # 'error': 'error_t', # 'elapsed': 'elpased_t', }) # unoconv gets file_required and cache_result from its subrenderer, which is constructed # at the end of __init__ try: self.renderer_metrics.add('file_required', self.file_required) self.renderer_metrics.add('cache_result', self.cache_result) except AttributeError: pass @abc.abstractmethod def render(self): pass @abc.abstractproperty def file_required(self): """Does the rendering html need the raw file content to display correctly? Syntax-highlighted text files do. Standard image formats do not, since an <img> tag only needs a url to the file. """ pass @abc.abstractproperty def cache_result(self): pass def _get_module_name(self): return self.__module__ \ .replace('mfr.extensions.', '', 1) \ .replace('.render', '', 1)
class BaseExporter(metaclass=abc.ABCMeta): def __init__(self, ext, source_file_path, output_file_path, format, metadata): """Initialize the base exporter. :param ext: the name of the extension to be exported :param source_file_path: the path of the input file :param output_file_path: the path of the output file :param format: the format of the exported file (e.g. 1200*1200.jpg) """ self.ext = ext self.source_file_path = source_file_path self.output_file_path = output_file_path self.format = format self.metadata = metadata self.exporter_metrics = MetricsRecord('exporter') if self._get_module_name(): self.metrics = self.exporter_metrics.new_subrecord(self._get_module_name()) self.exporter_metrics.merge({ 'class': self._get_module_name(), 'format': self.format, 'source_path': str(self.source_file_path), 'output_path': str(self.output_file_path), # 'error': 'error_t', # 'elapsed': 'elpased_t', }) @abc.abstractmethod def export(self): pass def _get_module_name(self): return self.__module__ \ .replace('mfr.extensions.', '', 1) \ .replace('.export', '', 1)
class BaseProvider(metaclass=abc.ABCMeta): """Base class for MFR Providers. Requires ``download`` and ``metadata`` methods. Validates that the given file url is hosted at a domain listed in `mfr.server.settings.ALLOWED_PROVIDER_DOMAINS`. """ def __init__(self, request, url): self.request = request url_netloc = furl.furl(url).netloc if url_netloc not in settings.ALLOWED_PROVIDER_NETLOCS: raise exceptions.ProviderError( message="{} is not a permitted provider domain.".format( markupsafe.escape(url_netloc) ), code=400 ) self.url = url self.provider_metrics = MetricsRecord('provider') self.metrics = self.provider_metrics.new_subrecord(self.NAME) self.provider_metrics.merge({ 'type': self.NAME, 'url': str(self.url), }) @abc.abstractproperty def NAME(self): raise NotImplementedError @abc.abstractmethod def metadata(self): pass @abc.abstractmethod def download(self): pass
class BaseHandler(CorsMixin, tornado.web.RequestHandler, SentryMixin): """Base class for the Render and Export handlers. Fetches the file metadata for the file indicated by the ``url`` query parameter and builds the provider caches. Also handles writing output and errors. """ bytes_written = 0 def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.handler_metrics = MetricsRecord('handler') self.handler_metrics.add('cache_file.result', None) self.handler_metrics.add('source_file.upload.required', True) self.metrics = self.handler_metrics.new_subrecord(self.NAME) self.extension_metrics = MetricsRecord('extension') @abc.abstractproperty def NAME(self): raise NotImplementedError async def prepare(self): """Builds an MFR provider instance, to which it passes the the ``url`` query parameter. From that, the file metadata is extracted. Also builds cached waterbutler providers. """ if self.request.method == 'OPTIONS': return try: self.url = self.request.query_arguments['url'][0].decode('utf-8') except KeyError: raise exceptions.ProviderError( '"url" is a required argument.', provider=settings.PROVIDER_NAME, code=400, ) self.provider = utils.make_provider(settings.PROVIDER_NAME, self.request, self.url) self.metadata = await self.provider.metadata() self.extension_metrics.add('ext', self.metadata.ext) self.cache_provider = waterbutler.core.utils.make_provider( settings.CACHE_PROVIDER_NAME, {}, # User information which can be left blank settings.CACHE_PROVIDER_CREDENTIALS, settings.CACHE_PROVIDER_SETTINGS) self.local_cache_provider = waterbutler.core.utils.make_provider( 'filesystem', {}, {}, settings.LOCAL_CACHE_PROVIDER_SETTINGS) self.source_file_id = uuid.uuid4() self.add_header('X-MFR-REQUEST-ID', str(uuid.uuid4())) async def write_stream(self, stream): try: while True: chunk = await stream.read(settings.CHUNK_SIZE) if not chunk: break # Temp fix, write does not accept bytearrays currently if isinstance(chunk, bytearray): chunk = bytes(chunk) self.bytes_written += len(chunk) self.write(chunk) del chunk await self.flush() except tornado.iostream.StreamClosedError: # Client has disconnected early. # No need for any exception to be raised return def write_error(self, status_code, exc_info): self.captureException(exc_info) # Log all non 2XX codes to sentry etype, exc, _ = exc_info if issubclass(etype, exceptions.PluginError): try: # clever errors shouldn't break other things current, child_type = {}, None for level in reversed(exc.attr_stack): if current: current = { '{}_{}'.format(level[0], child_type): current } current['child_type'] = child_type current.update(level[1]) current['self_type'] = level[0] child_type = level[0] current['materialized_type'] = '.'.join( [x[0] for x in exc.attr_stack]) self.error_metrics = current except Exception as exc: pass self.set_status(exc.code) self.finish(exc.as_html()) else: self.error_metrics = { 'code': self.get_status(), 'message': str(exc), 'self_type': 'error', 'child_type': 'nonspecific', 'materialized_type': 'error.nonspecific', 'error_nonspecific': { 'self_type': 'nonspecific', 'class': etype.__name__, 'data': repr(exc), }, } self.set_status(400) self.finish(''' <link rel="stylesheet" href="/static/css/bootstrap.min.css"> <div class="alert alert-warning" role="alert"> Unable to render the requested file, please try again later. </div> ''') # avoid dumping duplicate information to application log def log_exception(self, typ, value, tb): if isinstance(value, tornado.web.HTTPError): if value.log_message: format = "%d %s: " + value.log_message args = ([value.status_code, self._request_summary()] + list(value.args)) tornado.web.gen_log.warning(format, *args) else: tornado.web.app_log.error("Uncaught exception %s\n", self._request_summary(), exc_info=(typ, value, tb)) def on_finish(self): if self.request.method not in self.ALLOWED_METHODS: return self.handler_metrics.merge({ 'type': self.NAME, 'bytes_written': self.bytes_written, # 'elpased': elapsed.serialize(), 'cache_file': { 'id': str(getattr(self, 'cache_file_id', '')), 'path': str(getattr(self, 'cache_file_path', '')), }, 'source_file': { 'id': str(getattr(self, 'source_file_id', '')), 'path': str(getattr(self, 'source_file_path', '')), } }) if hasattr(self, 'cache_provider'): self.handler_metrics.merge( {'cache_file': { 'provider': self.cache_provider.NAME }}) if hasattr(self, 'local_cache_provider'): self.handler_metrics.merge( {'source_file': { 'provider': self.local_cache_provider.NAME }}) asyncio.ensure_future(self._cache_and_clean()) asyncio.ensure_future( remote_logging.log_analytics( remote_logging._serialize_request(self.request), self._all_metrics(), is_error=hasattr(self, 'error_metrics'))) async def _cache_and_clean(self): return def _all_metrics(self): metrics = { 'handler': self.handler_metrics.serialize(), } metrics_attrs = [ ('extension', 'extension_metrics'), ('file', 'metadata'), ('renderer', 'renderer_metrics'), ('exporter', 'exporter_metrics'), ] for (key, name) in metrics_attrs: metrics[key] = getattr(self, name).serialize() if hasattr( self, name) else None if hasattr(self, 'provider') and hasattr(self.provider, 'provider_metrics'): metrics['provider'] = self.provider.provider_metrics.serialize() # error_metrics is already serialized metrics['error'] = getattr(self, 'error_metrics') if hasattr( self, 'error_metrics') else None return metrics
class BaseHandler(CorsMixin, tornado.web.RequestHandler, SentryMixin): """Base class for the Render and Export handlers. Fetches the file metadata for the file indicated by the ``url`` query parameter and builds the provider caches. Also handles writing output and errors. """ bytes_written = 0 def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.handler_metrics = MetricsRecord('handler') self.handler_metrics.add('cache_file.result', None) self.handler_metrics.add('source_file.upload.required', True) self.metrics = self.handler_metrics.new_subrecord(self.NAME) self.extension_metrics = MetricsRecord('extension') @abc.abstractproperty def NAME(self): raise NotImplementedError async def prepare(self): """Builds an MFR provider instance, to which it passes the the ``url`` query parameter. From that, the file metadata is extracted. Also builds cached waterbutler providers. """ if self.request.method == 'OPTIONS': return try: self.url = self.request.query_arguments['url'][0].decode('utf-8') except KeyError: raise exceptions.ProviderError( '"url" is a required argument.', provider=settings.PROVIDER_NAME, code=400, ) self.provider = utils.make_provider( settings.PROVIDER_NAME, self.request, self.url ) self.metadata = await self.provider.metadata() self.extension_metrics.add('ext', self.metadata.ext) self.cache_provider = waterbutler.core.utils.make_provider( settings.CACHE_PROVIDER_NAME, {}, # User information which can be left blank settings.CACHE_PROVIDER_CREDENTIALS, settings.CACHE_PROVIDER_SETTINGS ) self.local_cache_provider = waterbutler.core.utils.make_provider( 'filesystem', {}, {}, settings.LOCAL_CACHE_PROVIDER_SETTINGS ) self.source_file_id = uuid.uuid4() async def write_stream(self, stream): try: while True: chunk = await stream.read(settings.CHUNK_SIZE) if not chunk: break # Temp fix, write does not accept bytearrays currently if isinstance(chunk, bytearray): chunk = bytes(chunk) self.bytes_written += len(chunk) self.write(chunk) del chunk await self.flush() except tornado.iostream.StreamClosedError: # Client has disconnected early. # No need for any exception to be raised return def write_error(self, status_code, exc_info): self.captureException(exc_info) # Log all non 2XX codes to sentry etype, exc, _ = exc_info if issubclass(etype, exceptions.PluginError): try: # clever errors shouldn't break other things current, child_type = {}, None for level in reversed(exc.attr_stack): if current: current = {'{}_{}'.format(level[0], child_type): current} current['child_type'] = child_type current.update(level[1]) current['self_type'] = level[0] child_type = level[0] current['materialized_type'] = '.'.join([x[0] for x in exc.attr_stack]) self.error_metrics = current except Exception as exc: pass self.set_status(exc.code) self.finish(exc.as_html()) else: self.error_metrics = { 'code': self.get_status(), 'message': str(exc), 'self_type': 'error', 'child_type': 'nonspecific', 'materialized_type': 'error.nonspecific', 'error_nonspecific': { 'self_type': 'nonspecific', 'class': etype.__name__, 'data': repr(exc), }, } self.set_status(400) self.finish(''' <link rel="stylesheet" href="/static/css/bootstrap.min.css"> <div class="alert alert-warning" role="alert"> Unable to render the requested file, please try again later. </div> ''') def on_finish(self): if self.request.method not in self.ALLOWED_METHODS: return self.handler_metrics.merge({ 'type': self.NAME, 'bytes_written': self.bytes_written, # 'elpased': elapsed.serialize(), 'cache_file': { 'id': str(getattr(self, 'cache_file_id', '')), 'path': str(getattr(self, 'cache_file_path', '')), }, 'source_file': { 'id': str(getattr(self, 'source_file_id', '')), 'path': str(getattr(self, 'source_file_path', '')), } }) if hasattr(self, 'cache_provider'): self.handler_metrics.merge({ 'cache_file': { 'provider': self.cache_provider.NAME } }) if hasattr(self, 'local_cache_provider'): self.handler_metrics.merge({ 'source_file': { 'provider': self.local_cache_provider.NAME } }) asyncio.ensure_future(self._cache_and_clean()) asyncio.ensure_future( remote_logging.log_analytics( remote_logging._serialize_request(self.request), self._all_metrics(), is_error=hasattr(self, 'error_metrics'))) async def _cache_and_clean(self): return def _all_metrics(self): metrics = { 'handler': self.handler_metrics.serialize(), } metrics_attrs = [ ('extension', 'extension_metrics'), ('file', 'metadata'), ('renderer', 'renderer_metrics'), ('exporter', 'exporter_metrics'), ] for (key, name) in metrics_attrs: metrics[key] = getattr(self, name).serialize() if hasattr(self, name) else None if hasattr(self, 'provider') and hasattr(self.provider, 'provider_metrics'): metrics['provider'] = self.provider.provider_metrics.serialize() # error_metrics is already serialized metrics['error'] = getattr(self, 'error_metrics') if hasattr(self, 'error_metrics') else None return metrics