async def __call__( self, ctx: ActorContext, dst_info: T.list(T.dict(dst=T.str)), dst_node_info: T.list(T.dict(dst=T.str, dst_node=T.str)), ): dst_nodes = defaultdict(set) dst_info = [x['dst'] for x in dst_info] for dst in dst_info: for dst_node in ctx.registery.find_dst_nodes(dst): dst_nodes[dst_node].add(dst) dst_node_info = [(x['dst'], x['dst_node']) for x in dst_node_info] for dst, dst_node in dst_node_info: dst_nodes[dst_node].add(dst) local_dst_list = dst_nodes.pop(self.app.name, None) if local_dst_list: for dst in local_dst_list: self.app.queue.op_notify(src_node=self.app.name, dst=dst, available=True) tasks = [] for dst_node, dst_list in dst_nodes.items(): content = dict(dst_list=list(dst_list)) tasks.append(self.safe_notify(ctx, content, dst_node=dst_node)) await asyncio.gather(*tasks)
def test_repr(): schema = T.dict(key=T.list(T.int).unique, ).optional.desc('a dict') assert repr(schema) == "T.dict({key}).optional.desc('a dict')" schema = T.list(T.int.min(0)).unique assert repr(schema) == 'T.list(int).unique' schema = T.str.minlen(10).optional(False) assert repr(schema) == 'T.str.minlen(10)' assert repr(Schema()) == 'Schema<>'
def test_list(): schema = isomorph_schema(['list.unique.maxlen(10).desc("a list")', 'int']) assert schema == T.list(T.int).unique.maxlen(10).desc('a list') schema = isomorph_schema(['int']) assert schema == T.list(T.int) with pytest.raises(SchemaError): isomorph_schema(['list', 'int', 'str'])
async def do_proxy_tell(ctx: ActorContext, tasks: T.list( T.dict( dst=T.str, content=T.dict.optional, ))): for t in tasks: await ctx.tell(dst=t['dst'], content=t['content'], priority=10)
def do_update_story_images( ctx: ActorContext, feed_id: T.int, offset: T.int, story_url: T.url, images: T.list(T.dict( url=T.url, status=T.int, )) ): # save image info url_root_status = {} for img in images: url_root = ImageInfo.extract_url_root(img['url']) value = (img['status'], img['url']) if url_root in url_root_status: url_root_status[url_root] = max(value, url_root_status[url_root]) else: url_root_status[url_root] = value with transaction.atomic(): image_info_objects = [] for url_root, (status, url) in url_root_status.items(): image_info_objects.append(ImageInfo( url_root=url_root, sample_url=url, referer=story_url, status_code=status, )) LOG.info(f'bulk create {len(image_info_objects)} ImageInfo objects') ImageInfo.objects.bulk_create(image_info_objects) _replace_story_images(feed_id, offset)
async def do_unregister( ctx: ActorContext, node_name: T.str) -> T.dict(nodes=T.list(NodeSpecSchema)): LOG.info(f'unregister node {node_name}') ctx.registery.remove(node_name) await ctx.tell('scheduler.save_registery') return dict(nodes=ctx.registery.to_spec())
def do_save_feed_creation_result( ctx: ActorContext, feed_creation_id: T.int, messages: T.list(T.str), feed: FeedSchema.optional, ): with transaction.atomic(): feed_dict = feed try: feed_creation = FeedCreation.get_by_pk(feed_creation_id) except FeedCreation.DoesNotExist: LOG.warning(f'feed creation {feed_creation_id} not exists') return if feed_creation.status == FeedStatus.READY: LOG.info(f'feed creation {feed_creation_id} is ready') return feed_creation.message = '\n\n'.join(messages) feed_creation.dt_updated = timezone.now() if not feed_dict: feed_creation.status = FeedStatus.ERROR feed_creation.save() FeedUrlMap(source=feed_creation.url, target=FeedUrlMap.NOT_FOUND).save() return url = feed_dict['url'] feed = Feed.get_first_by_url(url) if not feed: now = timezone.now() feed = Feed(url=url, status=FeedStatus.READY, reverse_url=reverse_url(url), dt_updated=now, dt_checked=now, dt_synced=now) feed.save() feed_creation.status = FeedStatus.READY feed_creation.feed_id = feed.id feed_creation.save() user_feed = UserFeed.objects.filter(user_id=feed_creation.user_id, feed_id=feed.id).first() if user_feed: LOG.info('UserFeed#{} user_id={} feed_id={} already exists'.format( user_feed.id, feed_creation.user_id, feed.id)) else: user_feed = UserFeed( user_id=feed_creation.user_id, feed_id=feed.id, is_from_bookmark=feed_creation.is_from_bookmark, ) user_feed.save() FeedUrlMap(source=feed_creation.url, target=feed.url).save() if feed.url != feed_creation.url: FeedUrlMap(source=feed.url, target=feed.url).save() ctx.hope('harbor_rss.update_feed', dict( feed_id=feed.id, feed=validate_feed_output(feed_dict), ))
async def do_register(ctx: ActorContext, node: NodeSpecSchema) -> T.dict(nodes=T.list(NodeSpecSchema)): LOG.info(f'register node:\n{pretty_format_json(node)}') existed = ctx.registery.get(node['name']) if existed and existed.to_spec() == node: LOG.info(f'register node {node["name"]} already existed and no changes') else: ctx.registery.add(node) LOG.info('current registery info:\n' + pretty_format_json(ctx.registery.to_spec())) await ctx.tell('scheduler.save_registery') return dict(nodes=ctx.registery.to_spec())
def test_optional(): assert _(T.int.optional)(None) is None assert _(T.str.optional)(None) == '' assert _(T.str.optional)('') == '' assert _(T.list(T.int).optional)(None) is None assert _(T.dict(key=T.int).optional)(None) is None with pytest.raises(Invalid): assert _(T.int.optional)('') with pytest.raises(Invalid): assert _(T.dict(key=T.int).optional)('') with pytest.raises(Invalid): assert _(T.int)(None) with pytest.raises(Invalid): assert _(T.str)(None) with pytest.raises(Invalid): assert _(T.dict(key=T.int))(None) with pytest.raises(Invalid): assert _(T.list(T.int))(None)
class Model: user = T.dict(userid=T.int.min(0).max(9).desc("UserID")) tags = T.list(T.int.min(0)) style = T.dict( width=T.int.desc("width"), height=T.int.desc("height"), border_width=T.int.desc("border_width"), border_style=T.str.desc("border_style"), border_color=T.str.desc("border_color"), color=T.str.desc("color"), ) optional = T.str.optional.desc("unknown value")
def test_str_copy_and_to_primitive(): schema = T.dict( key=T.list(T.int.min(0).max(9)).unique.optional(False), tag=T.str.desc('a tag'), ).optional.desc('a dict').__schema__ assert schema.to_primitive() == EXPECT assert json.loads(str(schema)) == EXPECT copy = schema.copy() assert copy.to_primitive() == EXPECT # verify copy is deep copy schema.items['key'].items = T.int assert copy.to_primitive() == EXPECT
def test_validr(benchmark): from validr import T, Compiler schema = Compiler().compile( T.dict( location=T.dict(lat=T.float.min(-90).max(90), lng=T.float.min(-180).max(180)), name=T.str, alt_names=T.list(T.str), population=T.dict(city=T.int.min(0), metro=T.int.min(0)), )) assert benchmark(schema, data) == data
async def do_detect_story_images( ctx: ActorContext, feed_id: T.int, offset: T.int, story_url: T.url, image_urls: T.list(T.url).unique, ): LOG.info( f'detect story images story={feed_id},{offset} num_images={len(image_urls)} begin' ) options = dict( allow_non_webpage=True, dns_service=DNS_SERVICE, ) async with AsyncFeedReader(**options) as reader: async def _read(url): if is_referer_deny_url(url): return url, FeedResponseStatus.REFERER_DENY.value response = await reader.read(url, referer="https://rss.anyant.com/", ignore_content=True) return url, response.status futs = [] for url in image_urls: futs.append(asyncio.ensure_future(_read(url))) t_begin = time.time() try: results = await asyncio.gather(*futs) except (TimeoutError, concurrent.futures.TimeoutError): results = [fut.result() for fut in futs if fut.done()] cost_ms = (time.time() - t_begin) * 1000 num_ok = num_error = 0 images = [] for url, status in results: if status == 200: num_ok += 1 else: num_error += 1 images.append(dict(url=url, status=status)) LOG.info(f'detect story images story={feed_id},{offset} ' f'num_images={len(image_urls)} finished, ' f'ok={num_ok} error={num_error} cost={cost_ms:.0f}ms') await ctx.hope( 'harbor_rss.update_story_images', dict( feed_id=feed_id, offset=offset, story_url=story_url, images=images, ))
async def __call__( self, ctx: ActorContext, dst_list: T.list(T.str), ): queue = self.app.queue src_node = ctx.message.src_node for dst in dst_list: try: queue.op_notify(src_node=src_node, dst=dst, available=True) except ActorStateError as ex: LOG.warning(ex) return dict(message='OK')
def test_custom_validator(): @validator(string=True) def choice_validator(compiler, items): choices = set(items.split()) def validate(value): if value in choices: return value raise Invalid('invalid choice') return validate compiler = Compiler(validators={'choice': choice_validator}) schema = T.list(T.choice('A B C D').default('A')) assert T(schema) == schema # test copy custom validator validate = compiler.compile(schema) assert validate(['A', 'B', 'C', 'D', None]) == ['A', 'B', 'C', 'D', 'A']
async def __call__( self, ctx: ActorContext, actor_name: T.str, upstream_list: T.list(T.str).minlen(1), maxsize: T.int.min(1), ): LOG.info( f'fetch dst={actor_name} maxsize={maxsize} from {upstream_list}') tasks = [] size = min(100, max(1, maxsize // len(upstream_list))) content = dict(dst=actor_name, maxsize=size) for src_node in upstream_list: if src_node == self.app.name: tasks.append(self.local_fetch(dst=actor_name, maxsize=size)) else: tasks.append(self.safe_fetch(ctx, content, src_node)) queue = self.app.queue messages_list = await asyncio.gather(*tasks) for src_node, messages in zip(upstream_list, messages_list): if len(messages) < size: queue.op_notify(src_node=src_node, dst=actor_name, available=False) if actor_name == ACTOR_MESSAGE_ACKER: for msg in messages: status = msg.content['status'] try: queue.op_acked(outbox_message_id=msg.id, status=status) except ActorStateError as ex: LOG.warning(ex) else: for msg in messages: msg = self.app.registery.complete_message(msg) try: queue.op_inbox(msg) except ActorStateError as ex: LOG.warning(ex)
import os.path from dotenv import load_dotenv from validr import T, modelclass, fields, Invalid from rssant_common.validator import compiler from actorlib.network_helper import LOCAL_NODE_NAME validate_extra_networks = compiler.compile(T.list(T.dict( name=T.str, url=T.url.relaxed, ))) @modelclass(compiler=compiler) class EnvConfig: debug = T.bool.default(True).desc('debug') log_level = T.enum('DEBUG,INFO,WARNING,ERROR').default('INFO') root_url = T.url.relaxed.default('http://*****:*****@url,name@url') secret_key = T.str.default('8k1v_4#kv4+3qu1=ulp+@@#65&++!fl1(e*7)ew&nv!)cq%e2y') allow_private_address = T.bool.default(False) check_feed_minutes = T.int.min(1).default(30) feed_story_retention = T.int.min(1).default(5000).desc('max storys to keep per feed') # actor actor_storage_path = T.str.default('data/actor_storage') actor_storage_compact_wal_delta = T.int.min(1).default(5000) actor_queue_max_complete_size = T.int.min(0).default(500)
title_detail=Detailed.optional.desc("The title of the feed/entry"), description=T.str.truncated.optional.desc( "The description of the feed/entry"), published=T.str.optional.desc("The date the feed/entry was published"), published_parsed=T.datetime.object.optional.invalid_to_default.desc( "The date the feed/entry was published"), updated=T.str.optional.desc("The date the feed/entry was updated"), updated_parsed=T.datetime.object.optional.invalid_to_default.desc( "The date the feed/entry was updated"), author=T.str.optional.desc("The author of this feed/entry"), author_detail=UserInfo.optional.desc( "Details about the feed/entry author"), tags=T.list( T.dict( term=T.str.optional.desc("The category term (keyword)"), scheme=T.str.optional.desc("The category scheme (domain)"), label=T.str.optional.desc( "A human-readable label for the category"), )).optional.desc("Details of the categories for the feed/entry"), license=T.str.optional.desc( "A URL of the license under which this entry is distributed"), ) StorySchema = T.dict( **CommonInfo, summary=T.str.truncated.optional.desc("A summary of the entry"), summary_detail=Detailed.optional.desc("A summary of the entry"), content=T.list(Detailed).optional.desc( "Details about the full content of the entry"), contributors=T.list(UserInfo).optional.desc( "Contributors (secondary authors) to this entry"),
async def do_query(ctx: ActorContext) -> T.dict(nodes=T.list(NodeSpecSchema)): return dict(nodes=ctx.registery.to_spec())
import readline except ImportError: return try: import rlcompleter except ImportError: return readline.set_completer(rlcompleter.Completer(context).complete) readline.parse_and_bind("tab:complete") # command history if os.path.exists(HISTORY_PATH): readline.read_history_file(HISTORY_PATH) atexit.register(_save_history) HeadersSchema = T.list(T.dict(name=T.str, value=T.str)) validate_headers = Compiler().compile(HeadersSchema) class Shell: def __init__(self, app): self.app = app headers = self._load_headers() self._client = Client(app, headers=headers) def _load_headers(self): if not os.path.exists(HEADERS_PATH): return None LOG.info(f"Load headers from {HEADERS_PATH!r}") try: with open(HEADERS_PATH) as f:
dt_updated=T.datetime.object.optional, encoding=T.str.optional, etag=T.str.optional, last_modified=T.str.optional, response_status=T.int.optional, checksum_data=T.bytes.maxlen(4096).optional, warnings=T.str.optional, ) FeedOutputSchemaFields = FeedSchemaFields.copy() FeedOutputSchemaFields.update(dt_updated=T.datetime.optional, ) StorySchema = T.dict(**StorySchemaFields) FeedSchema = T.dict( **FeedSchemaFields, storys=T.list(StorySchema), ) FeedInfoSchemaFieldNames = [ 'response_status', 'warnings', ] FeedInfoSchemaFields = { k: FeedSchemaFields[k] for k in FeedInfoSchemaFieldNames } FeedInfoSchema = T.dict( **FeedInfoSchemaFields, status=T.str.default(FeedStatus.READY), )
import logging from collections import defaultdict from threading import RLock from validr import T from functools import cached_property from .actor import Actor from .network_helper import LOCAL_NODE_NAME from .helper import generate_message_id from .message import ActorMessage LOG = logging.getLogger(__name__) NodeSpecSchema = T.dict(name=T.str, modules=T.list(T.str), networks=T.list( T.dict( name=T.str, url=T.str.optional, ))) class NodeInfo: def __init__(self, name: str, modules: set, networks: list): self.name = name self.modules = modules self._networks = networks def __repr__(self): return '<{} #{} {}>'.format(type(self).__name__, self.id, self.name)
def test_load_schema(): compiler = Compiler() schema = T.list(T.int.min(0)) assert T(schema) == schema assert T(compiler.compile(schema)) == schema assert T(['int.min(0)']) == schema
import os.path import re from functools import cached_property from urllib.parse import urlparse from dotenv import load_dotenv from validr import T, Compiler, modelclass, fields, Invalid from rssant_common.network_helper import LOCAL_NODE_NAME MAX_FEED_COUNT = 5000 compiler = Compiler() validate_extra_networks = compiler.compile( T.list(T.dict( name=T.str, url=T.url, ))) @modelclass(compiler=compiler) class ConfigModel: pass class GitHubConfigModel(ConfigModel): domain: str = T.str client_id: str = T.str secret: str = T.str class EnvConfig(ConfigModel):
from validr import T from . import case @case({ T.list(T.int): [ ([], []), ([1, 2], [1, 2]), (range(3), [0, 1, 2]), ], T.list(T.int).optional: { 'valid': [ None, [], ], 'invalid': [ 123, ] }, T.list(T.int).unique: { 'valid': [ [1, 2, 3], ], 'invalid': [ [1, 2, '2'], ] }, T.list(T.dict(key=T.int)).unique: { 'valid': [ [{ 'key': 1
def test_dict_error_position(): validate = compiler.compile(T.dict(key=T.list(T.dict(key=T.int)))) validate({'key': [{'key': 'x'}]})
def do_update_registery(ctx, nodes: T.list(NodeSpecSchema)): LOG.info(f'update registery {ctx.message}') ctx.registery.update(nodes) nodes = pretty_format_json(ctx.registery.to_spec()) LOG.info(f'current registery:\n' + nodes)
import os.path import re from dotenv import load_dotenv from validr import T, modelclass, fields, Invalid from rssant_common.validator import compiler from actorlib.network_helper import LOCAL_NODE_NAME validate_extra_networks = compiler.compile( T.list(T.dict( name=T.str, url=T.url.relaxed, ))) @modelclass(compiler=compiler) class ConfigModel: pass class EnvConfig(ConfigModel): debug: bool = T.bool.default(False).desc('debug') profiler_enable: bool = T.bool.default(False).desc( 'enable profiler or not') debug_toolbar_enable: bool = T.bool.default(False).desc( 'enable debug toolbar or not') log_level: str = T.enum('DEBUG,INFO,WARNING,ERROR').default('INFO') root_url: str = T.url.relaxed.default('http://localhost:6789') scheduler_network: str = T.str.default('localhost') scheduler_url: str = T.url.relaxed.default(
def do_dns_service_update(ctx, records: T.dict.key(T.str).value(T.list(T.str))): LOG.info('dns_service_update %r', records) DNS_SERVICE.update(records)
def test_create_enum_validator(): abcd_validator = create_enum_validator('abcd', ['A', 'B', 'C', 'D']) compiler = Compiler(validators={'abcd': abcd_validator}) schema = T.list(T.abcd.default('A')) validate = compiler.compile(schema) assert validate(['A', 'B', 'C', 'D', None]) == ['A', 'B', 'C', 'D', 'A']