def add_postcast_feed(feed_obj): """ 播客类型的 RSS 源 :param feed_obj: :return: 解析结果,成功字典;失败 None """ url = feed_obj.url if feed_obj.feed.get('title'): name = get_hash_name(url) site = Site.objects.filter(name=name, status='active') if site: logger.info(f"源已经存在:`{url}") return {"site": site[0].pk} cname = feed_obj.feed.title[:50] try: link = feed_obj.feed.content_detail.base except AttributeError: if feed_obj.feed.get('link'): link = feed_obj.feed.link[:1024] else: link = url if feed_obj.feed.get('subtitle'): brief = trim_brief(get_html_text(feed_obj.feed.subtitle))[:200] else: brief = feed_obj.feed.title try: author = feed_obj.feed.author_detail.name except AttributeError: author = feed_obj.feed.get('author') or get_short_host_name(link) # 使用默认头像 if feed_obj.feed.get('image'): favicon = save_avatar(feed_obj.feed.image.href, name) else: favicon = generate_rss_avatar(link) try: site = Site(name=name, cname=cname, link=link, brief=brief, star=10, copyright=30, creator='podcast', rss=url, favicon=favicon, author=author) site.save() return {"site": site.pk} except: logger.error(f'新增播客异常:`{url}') else: logger.warning(f"播客解析失败:`{url}") return None
def save_feed_to_db(name, cname, link, avatar, brief, url): site = Site.objects.filter(name=name) if site: logger.info(f"源已经存在:`{url}") return {"site": site[0].pk} else: # 新增站点 if avatar: favicon = save_avatar(avatar, name, referer=url) else: favicon = get_random_emoji() try: site = Site(name=name, cname=cname, link=link, brief=brief, star=12, creator='wemp', copyright=20, rss=url, favicon=favicon) site.save() return {"site": site.pk} except: logger.warning(f'新增公众号失败:`{name}') return None
def parse_wemp_ershicimi(url, update=False): """ 解析微信公众号,www.ershicimi.com :param url: 公众号主页地址 :param update: 如果已经存在,是否更新 :return: 解析结果,成功返回字典;失败 None """ rsp = get_with_proxy(url) if rsp is None: return None if rsp.ok: response = HtmlResponse(url=url, body=rsp.text, encoding='utf8') qrcode = response.selector.xpath( "//img[@class='qr-code']/@src").extract_first() if qrcode: name = urllib.parse.parse_qs( urllib.parse.urlparse(qrcode).query)['username'][0] if name: site = Site.objects.filter(name=name) if site: logger.info(f"源已经存在:`{url}") return {"site": site[0].pk} else: # 新增站点 cname = response.selector.xpath("//li[@class='title']//span[@class='name']/text()").\ extract_first().strip() avatar = response.selector.xpath( "//img[@class='avatar']/@src").extract_first().strip() favicon = save_avatar(avatar, name) brief = response.selector.xpath( "//div[@class='Profile-sideColumnItemValue']/text()" ).extract_first().strip() if cname and avatar and brief: try: site = Site(name=name, cname=cname, link=qrcode, brief=brief, star=9, creator='wemp', copyright=20, rss=url, favicon=favicon) site.save() except: logger.warning(f'新增公众号失败:`{name}') # 是否需要更新内容 if update: try: site = Site.objects.get(name=name) links = response.selector.xpath( "//*[@class='weui_media_title']/a/@href").extract( )[:10] for link in links: link = urllib.parse.urljoin(url, link) wemp_spider(link, site) set_updated_site(site.pk, ttl=12 * 3600) except: logger.warning(f'更新公众号内容出现异常:`{name}') return {"site": site.pk} else: logger.warning(f'微信公众号 id 解析失败:`{qrcode}') else: logger.warning(f'二维码链接解析失败:`{url}') return None
def github_callback(request): """ 接收 github 的登录回调 """ try: code = request.GET.get('code') if code: rsp = requests.post('https://github.com/login/oauth/access_token', data={ "client_id": settings.GITHUB_OAUTH_KEY, "client_secret": settings.GITHUB_OAUTH_SECRET, "code": code, }, headers={"Accept": "application/json"}, timeout=10) if rsp.ok: access_token = rsp.json().get('access_token') if access_token: rsp = requests.get('https://api.github.com/user', headers={ "Accept": "application/json", "Authorization": f"token {access_token}", }, timeout=10) if rsp.ok: if rsp.json().get('id'): oauth_id = f'github/{rsp.json()["id"]}' oauth_name = rsp.json().get( 'name') or rsp.json().get('login') oauth_avatar = rsp.json().get('avatar_url') oauth_email = rsp.json().get('email') oauth_blog = rsp.json().get( 'blog') or rsp.json().get('html_url') oauth_ext = json.dumps(rsp.json()) # 用户信息入库 user, created = User.objects.update_or_create( oauth_id=oauth_id, defaults={ "oauth_name": oauth_name, "oauth_avatar": oauth_avatar, "oauth_email": oauth_email, "oauth_blog": oauth_blog, "oauth_ext": oauth_ext, }) if created: logger.warning(f"欢迎新用户登录:`{user.oauth_name}") add_user_sub_feeds( oauth_id, get_visitor_subscribe_feeds('', '', star=28)) add_register_count() # 用户头像存储到本地一份,国内网络会丢图 avatar = save_avatar(oauth_avatar, oauth_id) user.avatar = avatar user.save() response = redirect('index') response.set_signed_cookie('oauth_id', oauth_id, max_age=10 * 365 * 86400) response.set_signed_cookie('toast', 'LOGIN_SUCC_MSG', max_age=20) return response except (ConnectTimeout, HTTPError, ReadTimeout, Timeout, ConnectionError): logger.warning("OAuth 认证网络出现异常!") except: logger.error("OAuth 认证出现未知异常") response = redirect('index') response.set_signed_cookie('toast', 'LOGIN_ERROR_MSG', max_age=20) return response
def parse_wemp_ershicimi(url): """ 解析微信公众号,www.ershicimi.com :param url: 公众号主页地址 :return: 解析结果,成功返回字典;失败 None """ try: rsp = requests.get(url, timeout=10) except: logger.warning(f'请求出现异常:`{url}') return None if rsp.ok: response = HtmlResponse(url=url, body=rsp.text, encoding='utf8') qrcode = response.selector.xpath( "//img[@class='qr-code']/@src").extract_first() if qrcode: name = urllib.parse.parse_qs( urllib.parse.urlparse(qrcode).query)['username'][0] if name: if not Site.objects.filter(name=name).exists(): # 新增站点 cname = response.selector.xpath("//li[@class='title']//span[@class='name']/text()").\ extract_first().strip() avatar = response.selector.xpath( "//img[@class='avatar']/@src").extract_first().strip() favicon = save_avatar(avatar, name) brief = response.selector.xpath( "//div[@class='Profile-sideColumnItemValue']/text()" ).extract_first().strip() if cname and avatar and brief: try: site = Site(name=name, cname=cname, link=qrcode, brief=brief, star=19, freq='日更', creator='wemp', copyright=20, tag='公众号', rss=url, favicon=favicon) site.save() except: logger.warning(f'新增公众号失败:`{name}') else: # 更新内容 try: site = Site.objects.get(name=name) wemp_links = response.selector.xpath( "//*[@class='weui_media_title']/a/@href").extract( )[:3] wemp_spider(wemp_links, site) except: logger.warning(f'更新公众号内容出现异常:`{name}') return {"name": name} else: logger.warning(f'微信公众号 id 解析失败:`{qrcode}') else: logger.warning(f'二维码链接解析失败:`{url}') return None