class TagArticle(JSONModel): KEY = 'TagArticle:%s' tag = StringProperty() article_id = IntegerProperty() time = IntegerProperty() def _get_watching_keys(self, inserting=False): return [self.KEY % self.tag] def _save_self(self, redis_client, inserting=False): key = self.KEY % self.tag if self.time: redis_client.zadd(key, self.time, self.article_id) else: redis_client.zrem(key, self.article_id) @classmethod def get_article_ids(cls, tag_name, cursor=None, limit=CONFIG.ARTICLES_PER_PAGE): redis_client = cls.redis_client key = cls.KEY % tag_name if cursor is None: return redis_client.zrevrange(key, 0, limit - 1, withscores=True, score_cast_func=int) else: return redis_client.zrevrangebyscore(key, '(%d' % cursor, 0, 0, limit, withscores=True, score_cast_func=int) @classmethod def get_articles(cls, category_name, cursor=None, limit=CONFIG.ARTICLES_PER_PAGE): article_ids_with_time = cls.get_article_ids(category_name, cursor) if article_ids_with_time: from .article import Article return Article.get_articles_and_next_cursor(article_ids_with_time, limit=limit) return [], None
class Model4(Model1): a = StringProperty() b = IntegerProperty() c = BooleanProperty() d = FloatProperty() e = DateTimeProperty() f = DateTimeProperty(auto_now=True)
class CategoryArticle(JSONModel): KEY = 'CategoryArticle:%s' category = StringProperty() article_id = IntegerProperty() time = IntegerProperty() def _get_watching_keys(self, inserting=False): return [self.KEY % self.category] def _save_self(self, redis_client, inserting=False): key = self.KEY % self.category if self.time: redis_client.zadd(key, self.time, self.article_id) else: redis_client.zrem(key, self.article_id)
class JSONTestModel(JSONModel): a = IntegerProperty() b = StringProperty() c = BooleanProperty() d = ListProperty() e = Property() def _save_self(self, redis_client, inserting=False): redis_client.hset(self.KEY, 'self', '1') def _save_relative(self, redis_client, inserting=False): redis_client.hset(self.KEY, 'relative', '1')
class ArticleComments(JSONModel): KEY = 'ArticleComments:%d' article_id = IntegerProperty() comment_ids = ListProperty(int) def _get_watching_keys(self, inserting=False): return [self.KEY % self.article_id] def _save_self(self, redis_client, inserting=False): key = self.KEY % self.article_id redis_client.delete(key) for comment_id in self.comment_ids: redis_client.rpush(key, comment_id) @classmethod def get_by_article_id(cls, article_id, order, page, page_size=CONFIG.COMMENTS_PER_PAGE): key = cls.KEY % article_id if page < 1: page = 1 if order: start_index = (page - 1) * page_size end_index = start_index + page_size - 1 else: end_index = -(page - 1) * page_size - 1 start_index = end_index - page_size + 1 comment_ids = cls.redis_client.lrange(key, start_index, end_index) if comment_ids and not order: comment_ids.reverse() return comment_ids @classmethod def append_comment_to_article(cls, redis_client, comment_id, article_id): redis_client.rpush(cls.KEY % article_id, comment_id) @classmethod def get_comment_count_of_article(cls, article_id): return cls.redis_client.llen(cls.KEY % article_id) @classmethod def get_comment_count_of_articles(cls, article_ids): with cls.redis_client.pipeline(transaction=False) as pipe: for article_id in article_ids: pipe.llen(cls.KEY % article_id) counts = pipe.execute() count_zip = zip(article_ids, [int(count) for count in counts]) return dict(count_zip)
class KeywordArticle(JSONModel): keywords = StringProperty() article_id = IntegerProperty() def _get_watching_keys(self, inserting=False): return [self.KEY] def _save_self(self, redis_client, inserting=False): member = '%s:%d' % (self.keywords, self.article_id) redis_client.sadd(self.KEY, member) def delete(self, redis_client): member = '%s:%d' % (self.keywords, self.article_id) redis_client.srem(self.KEY, member) @classmethod def query_by_keyword(cls, keyword, result_limit=CONFIG.SEARCH_PAGE_SIZE, search_limit=CONFIG.MAX_SEARCH_COUNT): cache_key = 'KeywordArticles:' + keyword cached_result = redis_cache_client.get(cache_key) if cached_result is not None: if not cached_result: return [] try: article_ids = cached_result.split(',') return [int(article_id) for article_id in article_ids] except ValueError: logging.warning('Key "%s" contains wrong value: %s', cache_key, cached_result) redis_cache_client.delete(cache_key) pattern = '*%s*:*' % keyword.lower() cursor, members = cls.redis_client.sscan(cls.KEY, match=pattern, count=search_limit) if members: article_ids = [ member.rsplit(':', 1)[-1] for member in members[:result_limit] ] result = [int(article_id) for article_id in article_ids] else: article_ids = result = [] redis_cache_client.set(cache_key, ','.join(article_ids), ex=CONFIG.DEFAULT_CACHE_TIME) return result
class UserEmail(JSONModel): email = StringProperty() id = IntegerProperty() @classmethod def get_user_id_by_email(cls, email): user_id = cls.redis_client.hget(cls.KEY, email) if user_id: return int(user_id) @classmethod def get_user_ids_by_emails(cls, emails): return cls.redis_client.hmget(cls.KEY, emails) def _save_self(self, redis_client, inserting=False): if self.id: redis_client.hset(self.KEY, self.email, self.id) else: redis_client.hdel(self.KEY, self.email)
class ArticleURL(JSONModel): url = StringProperty() article_id = IntegerProperty() @classmethod def get_article_id_by_url(cls, url): article_id = cls.redis_client.hget(cls.KEY, url) if article_id: return int(article_id) @classmethod def get_by_url(cls, url): article_id = cls.get_article_id_by_url(url) if article_id: return cls(url=url, id=article_id) @classmethod def search_by_date(cls, date, limit=CONFIG.SEARCH_PAGE_SIZE): cursor, result = cls.redis_client.hscan(cls.KEY, 0, date + '*', limit) return result def _save_self(self, redis_client, inserting=False): if self.url: if self.article_id: redis_client.hset(self.KEY, self.url, self.article_id) else: redis_client.hdel(self.KEY, self.url) def _check_inserting(self): article_id = self.get_article_id_by_url(self.url) if article_id: article_id = int(article_id) if article_id != self.article_id: raise IntegrityError( 'article url "%s" has been used by article %d' % (self.url, article_id))
class IDModel(JSONModel): id = IntegerProperty() @classmethod def count(cls): return cls.redis_client.llen(cls.KEY) @classmethod def get_by_id(cls, entity_id): entity_id = int(entity_id) if entity_id <= 0 or isinstance(entity_id, long): return json_content = cls.redis_client.lindex(cls.KEY, entity_id - 1) if json_content: return cls.from_json(json_content) @classmethod def get_by_ids(cls, ids, filter_empty=False): if not ids: return [] results = cls._get_data_by_ids(ids) if filter_empty: entities = [cls.from_json(json_content) for json_content in results if json_content] return [entity for entity in entities if entity] else: return [cls.from_json(json_content) for json_content in results] @classmethod def _get_data_by_ids(cls, ids): key = cls.KEY pipe = cls.redis_client.pipeline(transaction=False) for entity_id in ids: # todo: check id > 0 pipe.lindex(key, int(entity_id) - 1) return pipe.execute() def save(self, redis_client=None, inserting=False, relative=True, transactional=True): super(IDModel, self).save(redis_client, inserting, relative, transactional) def _check_inserting(self): if self.id is not None: raise PropertyError('cannot insert a %s object with id' % self.__class__.__name__) def _populate_required_attributes(self, pipeline): if self.id is None: self.id = pipeline.llen(self.KEY) + 1 def _save_self(self, pipeline, inserting=False): if inserting: pipeline.rpush(self.KEY, self.to_json()) else: if self.id is None: raise PropertyError('cannot save a %s object without id' % self.__class__.__name__) pipeline.lset(self.KEY, self.id - 1, self.to_json()) def _fail_on_save(self, exception, pipeline, inserting=False): if inserting: self.id = None
class PropertiedTestModel2(PropertiedModel): a = IntegerProperty()
class PublicTestModel(PublicModel): a = IntegerProperty()
class IDTestModel(IDModel): a = IntegerProperty() b = StringProperty() c = BooleanProperty() d = ListProperty()
class HashTestModel(HashModel): a = IntegerProperty() b = StringProperty() c = BooleanProperty() d = ListProperty()
class Comment(PublicModel): article_id = IntegerProperty() user_id = IntegerProperty() content = StringProperty() format = IntegerProperty() ua = ListProperty() time = DateTimeProperty(auto_now=True) HTML_PATTERN = re.compile('<.*?>|\&.*?\;', re.UNICODE) ROOT_LINK_PATTERN = re.compile(r'<a href="/([^"]*)">') ANCHOR_LINK_PATTERN = re.compile(r'<a href="#([^"]*)">') REPLY_LINK_PATTERN = re.compile(r'<a href="[^"]*#comment-id-(\d+)">') def html_content(self): return format_content(self.content, self.format) def html_content_with_full_url(self, article_url): # for email and ATOM content = self.html_content() content = self.ROOT_LINK_PATTERN.sub( r'<a href="%s/\1">' % CONFIG.MAJOR_HOST_URL, content) content = self.ANCHOR_LINK_PATTERN.sub( r'<a href="%s#\1">' % article_url, content) return content def striped_html_content(self, length=CONFIG.LATEST_COMMENTS_LENGTH): result = self.HTML_PATTERN.sub(' ', self.html_content()) return result[:length].strip() @classmethod def get_comments_of_article(cls, article_id, order, page, page_size=CONFIG.COMMENTS_PER_PAGE, public_only=True): comment_ids = ArticleComments.get_by_article_id( article_id, order, page, page_size) if comment_ids: has_next_page = len(comment_ids) == page_size return Comment.get_by_ids(comment_ids, filter_empty=True, public_only=public_only), has_next_page return [], False @classmethod def get_latest_comments(cls, limit=CONFIG.LATEST_COMMENTS_FOR_SIDEBAR): comments_json = cls.redis_client.lrange(cls.KEY, -limit, -1) if comments_json: comments = [] for comment_json in reversed(comments_json): comment = Comment.from_json(comment_json) if comment.public: comments.append(comment) if comments: article_ids = set() user_ids = set() for comment in comments: article_ids.add(comment.article_id) user_ids.add(comment.user_id) articles = Article.get_by_ids(article_ids, public_only=True) if articles: article_dict = { article.id: article for article in articles } users = User.get_by_ids(user_ids, filter_empty=True) if users: user_dict = {user.id: user for user in users} return comments, article_dict, user_dict return [], {}, {} def _get_relative_keys(self, inserting=False): if inserting: return [ArticleComments.KEY % self.article_id] return [] def _save_relative(self, redis_client, inserting=False): if inserting: ArticleComments.append_comment_to_article(redis_client, self.id, self.article_id)
class ArticleTime(JSONModel): article_id = IntegerProperty() time = IntegerProperty() @classmethod def get_article_ids_for_page(cls, page, page_size=CONFIG.ARTICLES_PER_PAGE): if page_size <= 0: return cls.redis_client.zrevrangebyscore(cls.KEY, '+inf', 0) if page < 1: page = 1 start_index = (page - 1) * page_size return cls.redis_client.zrevrangebyscore(cls.KEY, '+inf', 0, start_index, page_size) @classmethod def get_article_ids(cls, cursor=None, with_time=True, limit=CONFIG.ARTICLES_PER_PAGE): if cursor is None: return cls.redis_client.zrevrange(cls.KEY, 0, limit - 1, withscores=with_time, score_cast_func=int) else: return cls.redis_client.zrevrangebyscore(cls.KEY, '(%d' % cursor, 0, 0, limit, withscores=with_time, score_cast_func=int) @classmethod def get_previous_article_id(cls, publish_time): result = cls.redis_client.zrevrangebyscore(cls.KEY, '(%d' % publish_time, 0, 0, 1) if result: return int(result[0]) @classmethod def get_next_article_id(cls, publish_time): result = cls.redis_client.zrangebyscore(cls.KEY, '(%d' % publish_time, '+inf', 0, 1) if result: return int(result[0]) @classmethod def get_article_ids_by_data(cls, date): from_dt = parse_date_for_url(date) if from_dt: from_dt = from_dt.replace(tzinfo=LOCAL_TIMEZONE) from_time = datetime_to_timestamp(from_dt) to_time = from_time + SECONDS_IN_A_DAY from_time -= SECONDS_IN_A_DAY article_ids = cls.redis_client.zrangebyscore( cls.KEY, from_time, to_time) return [int(article_id) for article_id in article_ids] return [] @classmethod def get_count(cls): return cls.redis_client.zcard(cls.KEY) or 0 def _save_self(self, redis_client, inserting=False): if self.article_id: if self.time: redis_client.zadd(self.KEY, self.time, self.article_id) else: redis_client.zrem(self.KEY, self.article_id)
class Article(PublicModel): title = StringProperty() url = StringProperty() content = StringProperty() format = IntegerProperty() category = StringProperty() tags = ListProperty() keywords = StringProperty() public = BooleanProperty() pub_time = DateTimeProperty(auto_now=True) mod_time = DateTimeProperty(auto_now=True) def quoted_url(self): return quoted_string(self.url) def category_name(self): if self.category: return Category.get_parent_path_and_name(self.category)[1] def html_summary(self): content = self.content if CONFIG.SUMMARY_DELIMETER.search(content): summary = CONFIG.SUMMARY_DELIMETER.split(content, 1)[0] elif CONFIG.SUMMARY_DELIMETER2.search(content): summary = CONFIG.SUMMARY_DELIMETER2.split(content, 1)[0] else: summary = content return format_content(summary, self.format) def html_content(self): content = self.content if CONFIG.SUMMARY_DELIMETER.search(content): content = CONFIG.SUMMARY_DELIMETER.sub('', content, 1) elif CONFIG.SUMMARY_DELIMETER2.search(content): content = CONFIG.SUMMARY_DELIMETER2.split(content, 1)[1] return format_content(content, self.format) def _get_relative_keys(self, inserting=False): relative_keys = [ PublicArticlePublishTime.KEY, ArticleUpdateTime.KEY, PrivateArticlePublishTime.KEY ] if inserting: relative_keys.append(ArticleURL.KEY) if self.category: relative_keys.extend( [Category.KEY, CategoryArticle.KEY % self.category]) if self.tags: relative_keys.extend( [TagArticle.KEY % tag for tag in self.tags]) if self.keywords: relative_keys.append(KeywordArticle.KEY) else: origin_data = self._origin_data old_url = origin_data.get('url') if old_url and old_url != self.url: relative_keys.append(ArticleURL.KEY) old_category = origin_data.get('category') or '' if old_category != self.category: relative_keys.append(Category.KEY) if old_category: relative_keys.append(CategoryArticle.KEY % old_category) if self.category: relative_keys.append(CategoryArticle.KEY % self.category) old_tags = origin_data.get('tags') or [] if old_tags != self.tags: relative_keys.extend([ TagArticle.KEY % tag for tag in set(self.tags + old_tags) ]) old_keywords = origin_data.get('keywords') if old_keywords != self.keywords: relative_keys.append(KeywordArticle.KEY) return relative_keys def _save_relative(self, redis_client, inserting=False): if inserting: ArticleURL(url=self.url, article_id=self.id).save(redis_client, inserting=True) if self.category: CategoryArticle(category=self.category, article_id=self.id, time=self.pub_time).save(redis_client, inserting=True) if self.tags: for tag_name in self.tags: TagArticle(tag=tag_name, article_id=self.id, time=self.pub_time).save(redis_client, inserting=True) if self.keywords: KeywordArticle(keywords=self.keywords, article_id=self.id).save(redis_client, inserting=True) else: origin_data = self._origin_data old_url = origin_data.get('url') if old_url and old_url != self.url: ArticleURL(url=self.url, article_id=self.id).save(redis_client, inserting=True) ArticleURL(url=old_url, article_id=None).save(redis_client) old_category = origin_data.get('category') if old_category != self.category: cache_keys = [] if self.category: CategoryArticle(category=self.category, article_id=self.id, time=self.pub_time).save(redis_client, inserting=True) cache_keys.append(CategoryArticles.KEY % self.category) if old_category: CategoryArticle(category=old_category, article_id=self.id, time=None).save(redis_client) cache_keys.append(CategoryArticles.KEY % old_category) if cache_keys: redis_client.delete(*cache_keys) old_tags = origin_data.get('tags') if old_tags != self.tags: old_tag_set = set(old_tags) tag_set = set(self.tags) added_tag_set = tag_set - old_tag_set removed_tag_set = old_tag_set - tag_set for tag_name in added_tag_set: TagArticle(tag=tag_name, article_id=self.id, time=self.pub_time).save(redis_client, inserting=True) for tag_name in removed_tag_set: TagArticle(tag=tag_name, article_id=self.id, time=None).save(redis_client) old_keywords = origin_data.get('keywords') if old_keywords != self.keywords: if self.keywords: KeywordArticle(keywords=self.keywords, article_id=self.id).save(redis_client, inserting=True) if old_keywords: KeywordArticle(keywords=old_keywords, article_id=self.id).delete(redis_client) if self.public: PublicArticlePublishTime(article_id=self.id, time=self.pub_time).save(redis_client) ArticleUpdateTime(article_id=self.id, time=self.mod_time).save(redis_client) PrivateArticlePublishTime(article_id=self.id, time=None).save(redis_client) else: PublicArticlePublishTime(article_id=self.id, time=None).save(redis_client) ArticleUpdateTime(article_id=self.id, time=None).save(redis_client) PrivateArticlePublishTime(article_id=self.id, time=self.pub_time).save(redis_client) @classmethod def exist_url(cls, url): return ArticleURL.get_article_id_by_url(url) is not None @classmethod def get_by_url(cls, url): article_id = ArticleURL.get_article_id_by_url(url) if article_id: return cls.get_by_id(article_id) @classmethod def search(cls, date, url): article_ids = PublicArticlePublishTime.get_article_ids_by_data(date) if article_ids: articles = Article.get_by_ids(article_ids, public_only=True) if articles: if len(articles) == 1: return articles[0].quoted_url() urls = [article.quoted_url() for article in articles] matched_urls = get_close_matches(url, urls, 1, 0) return matched_urls[0] @classmethod def get_articles_and_next_cursor(cls, article_ids_with_time, public_only=True, limit=CONFIG.ARTICLES_PER_PAGE): article_ids = [ int(article_id) for article_id, timestamp in article_ids_with_time ] if len(article_ids) == limit: next_cursor = article_ids_with_time[-1][1] else: next_cursor = None articles = Article.get_by_ids(article_ids, filter_empty=True, public_only=public_only) return articles, next_cursor @classmethod def get_articles_for_homepage(cls, cursor=None, limit=CONFIG.ARTICLES_PER_PAGE): article_ids_with_time = PublicArticlePublishTime.get_article_ids( cursor, limit=limit) if article_ids_with_time: return cls.get_articles_and_next_cursor(article_ids_with_time, limit=limit) return [], None @classmethod def get_unpublished_articles(cls, page, page_size=CONFIG.ARTICLES_PER_PAGE): article_ids = PrivateArticlePublishTime.get_article_ids_for_page( page, page_size) if article_ids: return cls.get_by_ids(article_ids, filter_empty=True) return [] @classmethod def get_articles_count(cls, public=True): time_class = PublicArticlePublishTime if public else PrivateArticlePublishTime return time_class.get_count() @classmethod def get_articles_for_feed(cls, limit=CONFIG.ARTICLES_FOR_FEED): if CONFIG.SORT_FEED_BY_UPDATE_TIME: article_ids = ArticleUpdateTime.get_article_ids_for_page(1, limit) else: article_ids = PublicArticlePublishTime.get_article_ids( None, with_time=False, limit=limit) if article_ids: return cls.get_by_ids(article_ids, public_only=True) return [] def get_previous_article(self): time_class = PublicArticlePublishTime if self.public else PrivateArticlePublishTime article_id = time_class.get_previous_article_id(self.pub_time) if article_id: return Article.get_by_id(article_id) def get_next_article(self): time_class = PublicArticlePublishTime if self.public else PrivateArticlePublishTime article_id = time_class.get_next_article_id(self.pub_time) if article_id: return Article.get_by_id(article_id) def get_nearby_articles(self): time_class = PublicArticlePublishTime if self.public else PrivateArticlePublishTime previous_article_id = time_class.get_previous_article_id(self.pub_time) next_article_id = time_class.get_next_article_id(self.pub_time) if previous_article_id: if next_article_id: return Article.get_by_ids( (previous_article_id, next_article_id)) else: return Article.get_by_id(previous_article_id), None else: if next_article_id: return None, Article.get_by_id(next_article_id) else: return None, None