Пример #1
0
 def fetch(self, url: str) -> str:
     html = self._storage.get(url)
     if html is not None:
         return html
     if self.browser is not None:
         html = self.browser.get(url)
     else:
         req = urllib.request.Request(
             url,
             None,
             {
                 "User-Agent":
                 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
                 "Accept":
                 "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
                 "Accept-Charset": "utf-8;q=0.7,*;q=0.3",
                 "Accept-Language": "en-US,en;q=0.8",
                 "Connection": "keep-alive",
             },
         )
         cj = CookieJar()
         opener = urllib.request.build_opener(
             urllib.request.HTTPCookieProcessor(cj))
         response = opener.open(req)
         content = response.read()
         charset = cchardet.detect(content)
         html = content.decode(charset["encoding"] or "utf-8")
     logger.info(Fore.GREEN, "Sent", f"{url} {len(html)}")
     self._storage[url] = html
     logger.info(Fore.BLUE, "Storage", f"Set<{url}>")
     return html
Пример #2
0
    def register(self, item):
        """Register items"""
        if item in self.item_classes:
            logger.error('Register',
                         'Repeat register item <%s>' % (item.__name__))
            exit()
        self.item_classes.append(item)
        item.__base_url__ = item.__base_url__ or self.base_url
        for define_alias, define_route in OrderedDict(item.Meta.route).items():
            alias = '^' + define_alias.replace('?', '\?') + '$'
            _alias_re = re.compile(
                re.sub(
                    ':(?P<params>[a-z_]+)',
                    lambda m: '(?P<{}>[A-Za-z0-9_?&/=\s\-\u4e00-\u9fa5]+)'.
                    format(m.group('params')), alias))
            self.alias_re.append((define_alias, _alias_re))
            self.items[define_alias].append({
                'item':
                item,
                'alias_re':
                _alias_re,
                'alias':
                define_alias,
                'route':
                item.__base_url__ + define_route
            })

        logger.info(Fore.GREEN, 'Register', '<%s>' % (item.__name__))
        item_with_ajax = getattr(item.Meta, 'web', {}).get('with_ajax', False)
        if self.browser is None and item_with_ajax:
            self.browser = self.get_browser(settings=self.settings,
                                            item_with_ajax=item_with_ajax)
Пример #3
0
 def serve(self, ip='127.0.0.1', port=5000, **options):
     try:
         logger.info(Fore.WHITE, 'Serving', 'http://%s:%s' % (ip, port))
         self.server.run(ip, port, **options)
     except Exception as e:
         logger.error('Serving', '%s' % str(e))
         exit()
Пример #4
0
            def wrapper(error=None, ttl=ttl, *args, **kwargs):
                if error:
                    from flask import request
                    parse_result = urlparse(request.url)
                    if parse_result.query != '':
                        key = '{}?{}'.format(
                            parse_result.path,
                            parse_result.query
                        )
                    else:
                        key = request.path
                else:
                    # TODO
                    key = None
                cache_key = key
                ttl = ttl or self.ttl
                try:
                    if self.exists(cache_key):
                        logger.info(Fore.YELLOW, 'Cache', 'Get<%s>' % cache_key)
                        return jsonify(self.get(cache_key, **kwargs))
                except Exception:
                    logger.exception('Cache', 'Get<%s>' % cache_key)
                result = func(error, url=key, *args, **kwargs)
                if result and cache_key:
                    try:
                        if self.set(cache_key, result, ttl=ttl, **kwargs):
                            logger.info(Fore.YELLOW, 'Cache', 'Set<%s>' % cache_key)
                    except Exception:
                        logger.exception('Cache', 'Set<%s>' % cache_key)

                return jsonify(result)
Пример #5
0
 def set_cache(self, key, value):
     """Set cache"""
     if self.cache.get(key) is None and self.cache.set(key, value):
         logger.info(Fore.YELLOW, 'Cache', 'Set<%s>' % key)
         self.update_status('_status_cache_set')
         return True
     return False
Пример #6
0
 def fetch_page_source(self, url, item, params=None, **kwargs):
     """Fetch the html of given url"""
     self.update_status('_status_sent')
     if getattr(item.Meta, 'web', {}).get(
             'with_ajax', False) and self.browser is not None:
         self.browser.get(url)
         text = self.browser.page_source
         if text != '':
             logger.info(Fore.GREEN, 'Sent', '%s %s 200' % (url, len(text)))
         else:
             logger.error('Sent', '%s %s' % (url, len(text)))
         result = text
     else:
         request_config = getattr(item.Meta, 'web', {}).get(
             'request_config', {}) or self.web.get('request_config', {})
         response = requests.get(url,
                                 params=params,
                                 timeout=15,
                                 **request_config)
         content = response.content
         charset = cchardet.detect(content)
         text = content.decode(charset['encoding'])
         if response.status_code != 200:
             logger.error(
                 'Sent',
                 '%s %s %s' % (url, len(text), response.status_code))
         else:
             logger.info(
                 Fore.GREEN, 'Sent',
                 '%s %s %s' % (url, len(text), response.status_code))
         result = text
     self.set_storage(url, result)
     return result
Пример #7
0
        def page_not_found(error):
            start_time = time()
            path = request.full_path
            if path.endswith('?'):
                path = path[:-1]
            try:
                res = api.get_cache(path)
                if res is None:
                    res = api.parse(path)
                    api.set_cache(path, res)
                if res is None:
                    logger.error('Received', '%s 404' % request.url)
                    return 'Not Found', 404
                api.update_status('_status_received')
                end_time = time()
                time_usage = end_time - start_time
                logger.info(
                    Fore.GREEN, 'Received', '%s %s 200 %.2fms' %
                    (request.url, len(res), time_usage * 1000))

                return app.response_class(response=res,
                                          status=200,
                                          mimetype='application/json')
            except Exception as e:
                return str(e), 500
Пример #8
0
 def run(self, host="127.0.0.1", port=5000, **options):
     try:
         logger.info(Fore.GREEN, "Serving", f"http://{host}:{port}")
         self.app.run(host, port, **options)
     except Exception as e:
         logger.error("Serving", "%s" % str(e))
         logger.error("Serving", "%s" % str(traceback.format_exc()))
         exit()
Пример #9
0
 def register(self, item):
     """Register items"""
     item.__base_url__ = item.__base_url__ or self.base_url
     logger.info(Fore.WHITE, 'Register', '<%s:%s>' % (item.Meta.route, item.__name__))
     self.item_classes.append(item)
     item_with_ajax = getattr(item.Meta, 'web', {}).get('with_ajax', False)
     if self.browser is None and item_with_ajax:
         self.browser = self.get_browser(settings=self.settings, item_with_ajax=item_with_ajax)
Пример #10
0
 def run(self, host='127.0.0.1', port=5000, **options):
     try:
         logger.info(Fore.GREEN, 'Serving', f'http://{host}:{port}')
         self.app.run(host, port, **options)
     except Exception as e:
         logger.error('Serving', '%s' % str(e))
         logger.error('Serving', '%s' % str(traceback.format_exc()))
         exit()
Пример #11
0
 def _parse_item(self, html, item):
     """Parse a single item from html"""
     result = {}
     result[item.name] = item.parse(html)
     logger.info(
         Fore.CYAN, 'Parsed',
         'Item<%s[%s]>' % (item.name.title(), len(result[item.name])))
     return result
Пример #12
0
 def get_cache(self, key, default=None):
     """Set cache"""
     result = self.cache.get(key)
     if result is not None:
         logger.info(Fore.YELLOW, 'Cache', 'Get<%s>' % key)
         self.update_status('_status_cache_get')
         return result
     return default
Пример #13
0
 def get_storage(self, key, default=None):
     """Set storage"""
     result = self.storage.get(key)
     if result is not None:
         logger.info(Fore.BLUE, 'Storage', 'Get<%s>' % key)
         self.update_status('_status_storage_get')
         return result
     return default
Пример #14
0
 def parse_item(self, html, item):
     """Parse item from html"""
     result = {}
     result[item.__name__] = item.parse(html)
     if len(result[item.__name__]) == 0:
         logger.error('Parsed', 'Item<%s[%s]>' % (item.__name__.title(), len(result[item.__name__])))
     else:
         logger.info(Fore.CYAN, 'Parsed', 'Item<%s[%s]>' % (item.__name__.title(), len(result[item.__name__])))
     return result
Пример #15
0
        def fn(item):
            self._routes.append([source_format, target_format, item])
            logger.info(
                Fore.GREEN,
                "Register",
                f"<{item.__name__}: {source_format} {target_format}>",
            )

            return item
Пример #16
0
 def _fetch_page_source(self, url, params=None, **kwargs):
     """Fetch the html of given url"""
     if self.with_ajax:
         self._browser.get(url)
         text = self._browser.page_source
     else:
         response = requests.get(url, params=params, **kwargs)
         text = response.text
     logger.info(Fore.GREEN, 'Sent', '%s %s' % (url, len(text)))
     return text
Пример #17
0
    def set_storage(self, key, value):
        """Set storage"""

        try:
            if self.storage.get(key) is None and self.storage.save(key, value):
                logger.info(Fore.BLUE, 'Storage', 'Set<%s>' % key)
                self.update_status('_status_storage_set')
                return True
            return False
        except Exception as e:
            logger.error('Storage', 'Set<{}>'.format(str(e)))
            return False
Пример #18
0
 def page_not_found(error):
     parse_result = urlparse(request.url)
     if parse_result.query != '':
         url = '{}?{}'.format(parse_result.path, parse_result.query)
     else:
         url = request.path
     try:
         res = jsonify(self.parse(url))
         logger.info(Fore.GREEN, 'Received',
                     '%s %s' % (request.url, len(res.response[0])))
         return res
     except Exception as e:
         return str(e)
Пример #19
0
 def handler(path):
     try:
         start_time = time()
         full_path = request.full_path.strip('?')
         results = self.parse_url(full_path)
         end_time = time()
         time_usage = end_time - start_time
         res = jsonify(results)
         logger.info(
             Fore.GREEN, 'Received', '%s %s 200 %.2fms' %
             (request.url, len(res.response), time_usage * 1000))
         return res
     except Exception as e:
         logger.error('Serving', f'{e}')
         logger.error('Serving', '%s' % str(traceback.format_exc()))
         return jsonify({'msg': 'System Error', 'code': -1}), 500
Пример #20
0
def new(output_dir):
    """Create a new Toapi project."""

    if os.path.exists(output_dir):
        logger.error('New project', 'Directory already exists.')
        return

    logger.info(Fore.GREEN, 'New project',
                'Creating project directory "%s"' % output_dir)
    os.system('git clone https://github.com/toapi/toapi-template %s' %
              output_dir)
    os.system('rm -rf %s/.git' % output_dir)
    logger.info(Fore.GREEN, 'New project', 'Success!')
    click.echo('')
    click.echo('     cd %s' % output_dir)
    click.echo('     toapi run')
    click.echo('')
Пример #21
0
 def handler(path):
     try:
         start_time = time()
         full_path = request.full_path.strip("?")
         results = self.parse_url(full_path)
         end_time = time()
         time_usage = end_time - start_time
         res = jsonify(results)
         logger.info(
             Fore.GREEN,
             "Received",
             "%s %s 200 %.2fms"
             % (request.url, len(res.response), time_usage * 1000),
         )
         return res
     except Exception as e:
         logger.error("Serving", f"{e}")
         logger.error("Serving", "%s" % str(traceback.format_exc()))
         return jsonify({"msg": "System Error", "code": -1}), 500
Пример #22
0
        def wrapper(*args, **kwargs):
            cache_key = key or kwargs.pop('dynamic_key', None)
            if isinstance(cache_config, dict):
                kwargs.update(cache_config)
            cache_ins = cache_class(serializer=serializer, **kwargs)
            try:
                if cache_ins.exists(cache_key):
                    logger.info(Fore.YELLOW, 'Cache', 'Get<%s>' % cache_key)
                    return cache_ins.get(cache_key, **kwargs)
            except Exception:
                logger.exception('Cache', 'Get<%s>' % cache_key)
            result = func(*args, **kwargs)
            if result and cache_key:
                try:
                    if cache_ins.set(cache_key, result, ttl=ttl, **kwargs):
                        logger.info(Fore.YELLOW, 'Cache',
                                    'Set<%s>' % cache_key)
                except Exception:
                    logger.exception('Cache', 'Set<%s>' % cache_key)

            return result
Пример #23
0
    def parse_url(self, full_path: str) -> dict:
        results = self._cache.get(full_path)
        if results is not None:
            logger.info(Fore.YELLOW, "Cache", f"Get<{full_path}>")
            return results

        results = {}
        for source_format, target_format, item in self._routes:
            parsed_path = self.convert_string(
                full_path, source_format, target_format
            )
            if parsed_path is not None:
                full_url = self.absolute_url(item._site, parsed_path)
                html = self.fetch(full_url)
                result = item.parse(html)
                logger.info(
                    Fore.CYAN,
                    "Parsed",
                    f"Item<{item.__name__}[{len(result)}]>",
                )
                results.update({item.__name__: result})

        self._cache[full_path] = results
        logger.info(Fore.YELLOW, "Cache", f"Set<{full_path}>")

        return results
Пример #24
0
        def page_not_found(error):
            start_time = time()
            path = request.full_path
            if path.endswith('?'):
                path = path[:-1]
            try:
                result = api.get_cache(path) or api.parse(path)
                if result is None:
                    logger.error('Received', '%s 404' % request.url)
                    return 'Not Found', 404
                api.set_cache(path, result)
                res = jsonify(result)
                api.update_status('_status_received')
                end_time = time()
                time_usage = end_time - start_time
                logger.info(
                    Fore.GREEN, 'Received', '%s %s 200 %.2fms' %
                    (request.url, len(res.response), time_usage * 1000))

                return res
            except Exception as e:
                return str(e)
Пример #25
0
    def serve(self, ip='0.0.0.0', port='5000', debug=None, **options):
        """Todo: Serve as an api server powered by flask"""
        from flask import Flask, jsonify, request
        app = Flask(__name__)
        app.logger.setLevel(logging.ERROR)

        @app.errorhandler(404)
        def page_not_found(error):
            parse_result = urlparse(request.url)
            if parse_result.query != '':
                url = '{}?{}'.format(parse_result.path, parse_result.query)
            else:
                url = request.path
            try:
                res = jsonify(self.parse(url))
                logger.info(Fore.GREEN, 'Received',
                            '%s %s' % (request.url, len(res.response[0])))
                return res
            except Exception as e:
                return str(e)

        logger.info(Fore.WHITE, 'Serving', 'http://%s:%s' % (ip, port))
        app.run(ip, port, debug=False, **options)
Пример #26
0
def new(dir_or_project):
    """Create a new Toapi project.

    Giving a dir means start a default template,

    Example: toapi new api

    Giving a github project means start a github template.

    Example: toapi new toapi/toapi-one
    """

    if '/' in dir_or_project:
        dir_name = dir_or_project.split('/')[-1]
        logger.info(Fore.GREEN, 'New project',
                    'Creating project directory "%s"' % dir_name)
        os.system('git clone https://github.com/%s %s' %
                  (dir_or_project, dir_name))
        os.system('rm -rf %s/.git' % dir_name)
        logger.info(Fore.GREEN, 'New project', 'Success!')
        click.echo('')
        click.echo('     cd %s' % dir_name)
        click.echo('     toapi run')
        click.echo('')

    else:
        if os.path.exists(dir_or_project):
            logger.error('New project', 'Directory already exists.')
            return

        logger.info(Fore.GREEN, 'New project',
                    'Creating project directory "%s"' % dir_or_project)
        os.system('git clone https://github.com/toapi/toapi-template %s' %
                  dir_or_project)
        os.system('rm -rf %s/.git' % dir_or_project)
        logger.info(Fore.GREEN, 'New project', 'Success!')
        click.echo('')
        click.echo('     cd %s' % dir_or_project)
        click.echo('     toapi run')
        click.echo('')
Пример #27
0
    def fetch(self, url: str) -> str:
        html = self._storage.get(url)
        if html is not None:
            logger.info(Fore.BLUE, 'Storage', f'Get<{url}>')
            return html

        r = requests.get(url)
        content = r.content
        charset = cchardet.detect(content)
        html = content.decode(charset['encoding'] or 'utf-8')
        logger.info(Fore.GREEN, 'Sent', f'{url} {len(html)} {r.status_code}')
        self._storage[url] = html
        logger.info(Fore.BLUE, 'Storage', f'Set<{url}>')
        return html
Пример #28
0
 def fetch(self, url: str) -> str:
     html = self._storage.get(url)
     if html is not None:
         logger.info(Fore.BLUE, "Storage", f"Get<{url}>")
         return html
     if self.browser is not None:
         html = self.browser.get(url)
     else:
         r = requests.get(url)
         content = r.content
         charset = cchardet.detect(content)
         html = content.decode(charset["encoding"] or "utf-8")
     logger.info(Fore.GREEN, "Sent", f"{url} {len(html)}")
     self._storage[url] = html
     logger.info(Fore.BLUE, "Storage", f"Set<{url}>")
     return html
Пример #29
0
 def stop(self, signal, frame):
     logger.info(Fore.WHITE, 'Server', 'Server Stopped')
     exit()
Пример #30
0
 def serve(self, ip='127.0.0.1', port=5000, **options):
     try:
         logger.info(Fore.WHITE, 'Serving', 'http://%s:%s' % (ip, port))
         self.app.run(ip, port, debug=False, **options)
     except KeyboardInterrupt:
         sys.exit()