示例#1
0
 def FetchProxies(self):
     print 'start to fetch html page'
     user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'  
     headers = { 'User-Agent' : user_agent }
     req = urllib2.Request('http://www.xicidaili.com/wn/', headers=headers)  
     response = urllib2.urlopen(req)
     html = response.read()
     #print html
     print 'start to analysis html page'
     soup = BeautifulSoup(html, 'html5lib')
     tbody = soup.find_all('tbody')
     proxylistitems = tbody[0].find_all('tr')
     proxies = []
     
     print 'start to validate proxys, count is',len(proxylistitems)
     for proxylistitem in proxylistitems:
         itemtexts = proxylistitem.find_all('td')
         if len(itemtexts) < 4:
             continue
         address = itemtexts[2].string.strip()
         port = itemtexts[3].string.strip()
         postdata = '{0}:{1}'.format(address, port)
         add_task('ValidateProxyTaskQueue', 'http://1.fetchproxy.applinzi.com/task/validateproxy', postdata)
         proxies += [postdata]
     return proxies
示例#2
0
 def broadcast(self, msg, type):
     print msg, type
     #listener_manager.broadcast(json.dumps({'msg':msg,'type':type}),self.listeners)
     from sae.taskqueue import add_task
     rkey = msg_box.gen_key()
     msg_box.set_msg(rkey, json.dumps({'type': type, 'msg': msg}))
     add_task('msg_queue', '/logboard/broadcast/', rkey)
示例#3
0
文件: views.py 项目: xkong/ninantrash
 def get(self, request, *args, **kwargs):
     for subscribe in Subscribe.objects.filter(is_valid=True):
         _kwargs = {"site": subscribe.site,
                    "id": subscribe.id,
                    "keywords": subscribe.keywords}
         url = reverse('subscribe_scan')
         add_task('subscribe_q', url, payload=_kwargs)
     return self.render_to_response({'code': 0, 'msg': 'ok'})
示例#4
0
    def broadcast(self, msg, type):
        print msg, type
        # listener_manager.broadcast(json.dumps({'msg':msg,'type':type}),self.listeners)
        from sae.taskqueue import add_task

        rkey = msg_box.gen_key()
        msg_box.set_msg(rkey, json.dumps({"type": type, "msg": msg}))
        add_task("msg_queue", "/logboard/broadcast/", rkey)
示例#5
0
    def post(self):
        self.set_header('Content-Type','application/json')
        rspd = {'status': 201, 'msg':'ok'}

        try:
            tf = {'true':1,'false':0}
            timestamp = int(time())
            post_dic = {
                'category': self.get_argument("cat"),
                'title': self.get_argument("tit"),
                'content': self.get_argument("con"),
                'tags': self.get_argument("tag",'').replace(u',',','),
                'closecomment': self.get_argument("clo",'0'),
                'password': self.get_argument("password",''),
                'add_time': timestamp,
                'edit_time': timestamp,
                'archive': genArchive(),
            }
            if post_dic['tags']:
                tagslist = set([x.strip() for x in post_dic['tags'].split(',')])
                try:
                    tagslist.remove('')
                except:
                    pass
                if tagslist:
                    post_dic['tags'] = ','.join(tagslist)
            post_dic['closecomment'] = tf[post_dic['closecomment'].lower()]
        except:
            rspd['status'] = 500
            rspd['msg'] = '错误: 注意必填的三项'
            self.write(json.dumps(rspd))
            return

        postid = Article.add_new_article(post_dic)
        if postid:
            keyname = 'pv_%s' % (str(postid))
            set_count(keyname,0,0)
            
            Category.add_postid_to_cat(post_dic['category'], str(postid))
            Archive.add_postid_to_archive(genArchive(), str(postid))
            increment('Totalblog')
            if post_dic['tags']:
                Tag.add_postid_to_tags(post_dic['tags'].split(','), str(postid))

            rspd['status'] = 200
            rspd['msg'] = '完成: 你已经成功添加了一篇文章 <a href="/t/%s" target="_blank">查看</a>' % str(postid)
            clear_cache_by_pathlist(['/', 'cat:%s' % quoted_string(post_dic['category'])])

            if not debug:
                add_task('default', '/task/pingrpctask')

            self.write(json.dumps(rspd))
            return
        else:
            rspd['status'] = 500
            rspd['msg'] = '错误: 未知错误,请尝试重新提交'
            self.write(json.dumps(rspd))
            return
示例#6
0
    def post(self):
        self.set_header('Content-Type', 'application/json')
        rspd = {'status': 201, 'msg': 'ok'}

        try:
            tf = {'true': 1, 'false': 0}
            timestamp = int(time())
            post_dic = {
                'category': self.get_argument("cat"),
                'title': self.get_argument("tit"),
                'content': self.get_argument("con"),
                'tags': self.get_argument("tag", '').replace(u',', ','),
                'closecomment': self.get_argument("clo", '0'),
                'password': self.get_argument("password", ''),
                'add_time': timestamp,
                'edit_time': timestamp,
            }
            if post_dic['tags']:
                tagslist = set(
                    [x.strip() for x in post_dic['tags'].split(',')])
                try:
                    tagslist.remove('')
                except:
                    pass
                if tagslist:
                    post_dic['tags'] = ','.join(tagslist)
            post_dic['closecomment'] = tf[post_dic['closecomment'].lower()]
        except:
            rspd['status'] = 500
            rspd['msg'] = '错误: 注意必填的三项'
            self.write(json.dumps(rspd))
            return

        postid = Article.add_new_article(post_dic)
        if postid:
            Category.add_postid_to_cat(post_dic['category'], str(postid))
            if post_dic['tags']:
                Tag.add_postid_to_tags(post_dic['tags'].split(','),
                                       str(postid))

            rspd['status'] = 200
            rspd[
                'msg'] = '完成: 你已经成功添加了一篇文章 <a href="/t/%s" target="_blank">查看</a>' % str(
                    postid)
            clear_cache_by_pathlist(
                ['/', 'cat:%s' % quoted_string(post_dic['category'])])

            if not debug:
                add_task('default', '/task/pingrpctask')

            self.write(json.dumps(rspd))
            return
        else:
            rspd['status'] = 500
            rspd['msg'] = '错误: 未知错误,请尝试重新提交'
            self.write(json.dumps(rspd))
            return
示例#7
0
 def get(self, request, *args, **kwargs):
     for user in User.objects.all():
         if not has_fish(user):
             continue
         _kwargs = {'user': user.username}
         url = reverse('fish_check4water', kwargs=_kwargs)
         fish_taskq = getattr(settings, 'FEEDFISH_TASKQUEUE', 'fish')
         add_task(fish_taskq, url)
     return self.render_to_response({'code': 0, 'msg': 'ok'})
示例#8
0
 def get(self, request, *args, **kwargs):
     for user in User.objects.all():
         if not has_fish(user):
             continue
         _kwargs = {'user': user.username}
         url = reverse('fish_check4water', kwargs=_kwargs)
         fish_taskq = getattr(settings, 'FEEDFISH_TASKQUEUE', 'fish')
         add_task(fish_taskq, url)
     return self.render_to_response({'code': 0, 'msg': 'ok'})
示例#9
0
 def get(self, request, *args, **kwargs):
     for subscribe in Subscribe.objects.filter(is_valid=True):
         _kwargs = {
             "site": subscribe.site,
             "id": subscribe.id,
             "keywords": subscribe.keywords
         }
         url = reverse('subscribe_scan')
         add_task('subscribe_q', url, payload=_kwargs)
     return self.render_to_response({'code': 0, 'msg': 'ok'})
示例#10
0
def on_exportlog_saved(sender, instance, created, **kwargs):
    if not created:
        return

    if 'SERVER_SOFTWARE' in os.environ:
        from sae.taskqueue import add_task
        add_task('export', '/v1/task/export/', payload='eid=%d' % instance.id)
        return

    print "local env: do real export task"
示例#11
0
 def QueryAllProxy(self):
     conn=MySQLdb.connect(host=sae.const.MYSQL_HOST,user=sae.const.MYSQL_USER,passwd=sae.const.MYSQL_PASS,db=sae.const.MYSQL_DB,port=int(sae.const.MYSQL_PORT),charset="utf8")    
     cursor = conn.cursor() 
     cursor.execute("select * from app_proxys")
     queryret = cursor.fetchall()
     for i in range(0,len(queryret),10):
         b=queryret[i:i+10]
         c = map(getres, b)
         add_task('ValidateProxyTaskQueue', 'http://1.fetchproxy.applinzi.com/task/removeproxy', json.dumps(c))
     return len(queryret)
示例#12
0
def trigger(queue_name, url_name, payload = {}, *args, **kwargs):

    if not isinstance(payload, (str, unicode)) and isinstance(payload, dict):
        payload = urlencode(payload)

    add_task(
        'chat'+str(randint(0,9)),
        reverse(url_name),
        payload,
        *args, 
        **kwargs
    )
示例#13
0
    def post(self):
        self.set_header("Content-Type", "application/json")
        rspd = {"status": 201, "msg": "ok"}

        try:
            tf = {"true": 1, "false": 0}
            timestamp = int(time())
            post_dic = {
                "category": self.get_argument("cat"),
                "title": self.get_argument("tit"),
                "content": self.get_argument("con"),
                "tags": self.get_argument("tag", "").replace(u",", ","),
                "closecomment": self.get_argument("clo", "0"),
                "password": self.get_argument("password", ""),
                "add_time": timestamp,
                "edit_time": timestamp,
            }
            if post_dic["tags"]:
                tagslist = set([x.strip() for x in post_dic["tags"].split(",")])
                try:
                    tagslist.remove("")
                except:
                    pass
                if tagslist:
                    post_dic["tags"] = ",".join(tagslist)
            post_dic["closecomment"] = tf[post_dic["closecomment"].lower()]
        except:
            rspd["status"] = 500
            rspd["msg"] = "错误: 注意必填的三项"
            self.write(json.dumps(rspd))
            return

        postid = Article.add_new_article(post_dic)
        if postid:
            Category.add_postid_to_cat(post_dic["category"], str(postid))
            if post_dic["tags"]:
                Tag.add_postid_to_tags(post_dic["tags"].split(","), str(postid))

            rspd["status"] = 200
            rspd["msg"] = '完成: 你已经成功添加了一篇文章 <a href="/t/%s" target="_blank">查看</a>' % str(postid)
            clear_cache_by_pathlist(["/", "cat:%s" % quoted_string(post_dic["category"])])

            if not debug:
                add_task("default", "/task/pingrpctask")

            self.write(json.dumps(rspd))
            return
        else:
            rspd["status"] = 500
            rspd["msg"] = "错误: 未知错误,请尝试重新提交"
            self.write(json.dumps(rspd))
            return
示例#14
0
def on_exportlog_saved(sender, instance, created, **kwargs):
    if not created:
        return

    if 'SERVER_SOFTWARE' in os.environ:
        from sae.taskqueue import add_task
        add_task(
            'export', 
            '/v1/task/export/', 
            payload='eid=%d' % instance.id)
        return

    print "local env: do real export task"
示例#15
0
    def on_url(self, msg):
        text = None
        msgs = None

        id, result = dba.msg_text_insert(msg)
        if id > 0:
            text = u"谢谢投递!"
            url = u"{}-{}".format(str(id), msg.content)
            add_task("FetchJobQueue", "/task/fetch", url)
        else:
            text = u"数据库操作不幸失败鸟!"
        msgs = [{"title": text, "content": ""}]
        return 0, self.msg_builder.to_text(msgs, msg.from_user, msg.to_user, int(time.time()))
示例#16
0
文件: post.py 项目: cash2one/cms4p
    def post(self):
        rspd = {"status": 200, "msg": "OK"}
        try:
            tf = {'true': 1, 'false': 0}
            act = self.get_argument("act", '').encode('utf-8')
            post_dic = {
                'category_id':
                self.get_argument("category_id", '-').encode('utf-8'),
                'user_id':
                self.get_secure_cookie("user_id"),
                'title':
                self.get_argument("title").encode('utf-8'),
                'digest':
                '-',
                'content':
                self.get_argument("content").encode('utf-8'),
                'image_url':
                '-',
                'tags':
                ','.join(self.get_arguments("tag")),
                'allow_comment':
                tf[self.get_argument("clo", 'false')],
                'top':
                tf[self.get_argument("top", 'false')],
                'password':
                self.get_argument("password", '').encode('utf-8'),
                'salt':
                '-',
            }
        except:
            rspd['status'] = 500
            rspd['msg'] = "用户名、邮箱均为必填项!"
            self.write(json.dumps(rspd))
            return

        if act == 'add':
            Posts.create(post_dic)
        elif act == 'edit':
            post_dic['post_id'] = int(self.get_argument("post_id", ""))
            Posts.update(post_dic)

        clear_cache_by_pathlist(['/'])

        if not debug:
            add_task('default', '/task/pingrpctask')

        self.set_header("Content-Type", "application/json")
        rspd['msg'] = "成功保存文章!"
        self.write(json.dumps(rspd))
示例#17
0
 def get(self, request, *args, **kwargs):
     """
     Called from crontab.
     """
     # time_called : which time in a day this function is called.
     # Must in [0, 1, 2], i.e, three times a day
     time_called = kwargs.setdefault('time', 0)
     for user in User.objects.all():
         if not has_fish(user):
             continue
         _kwargs = {'time': time_called, 'user': user.username}
         url = reverse('fish_check4feed', kwargs=_kwargs)
         fish_taskq = getattr(settings, 'FEEDFISH_TASKQUEUE', 'fish')
         add_task(fish_taskq, url)
     return self.render_to_response({'code': 0, 'msg': 'ok'})
示例#18
0
 def get(self, request, *args, **kwargs):
     """
     Called from crontab.
     """
     # time_called : which time in a day this function is called.
     # Must in [0, 1, 2], i.e, three times a day
     time_called = kwargs.setdefault('time', 0)
     for user in User.objects.all():
         if not has_fish(user):
             continue
         _kwargs = {'time': time_called, 'user': user.username}
         url = reverse('fish_check4feed', kwargs=_kwargs)
         fish_taskq = getattr(settings, 'FEEDFISH_TASKQUEUE', 'fish')
         add_task(fish_taskq, url)
     return self.render_to_response({'code': 0, 'msg': 'ok'})
示例#19
0
    def get(self, request, *args, **kwargs):
        instance = self.get_object()
        default_bucket = getattr(settings, 'STORAGE_BUCKET_NAME')

        content = instance.content
        pattern = r' src="(.*?)" '
        image_urls = re.findall(pattern, content)
        for url in image_urls:
            path = urlparse(url).path
            image_file_name = path.split(default_bucket)[-1]
            task_link = reverse('weixinmp.image_upload', args=(instance.pk, ))
            payload = {'name': image_file_name, 'raw_url': url}
            payload = urlencode(payload)
            delay = random.randrange(298)
            add_task('weixin', task_link, payload=payload, delay=delay)
        return self.render_to_response({'msg': image_urls})
示例#20
0
    def get(self, request, *args, **kwargs):
        instance = self.get_object()
        default_bucket = getattr(settings, 'STORAGE_BUCKET_NAME')

        content = instance.content
        pattern = r' src="(.*?)" '
        image_urls = re.findall(pattern, content)
        for url in image_urls:
            path = urlparse(url).path
            image_file_name = path.split(default_bucket)[-1]
            task_link = reverse('weixinmp.image_upload', args=(instance.pk,))
            payload = {'name': image_file_name,
                       'raw_url': url}
            payload = urlencode(payload)
            delay = random.randrange(298)
            add_task('weixin', task_link, payload=payload, delay=delay)
        return self.render_to_response({'msg': image_urls})
示例#21
0
    def save(self,  *args,  **kwargs):
        """ Auto add timestamp when saved."""
        created = not self.id
        if not self.digest:
            self.digest = self.get_digest()
        if not self.cover_img:
            self.cover_img = getattr(settings, 'WEIXIN_DEFAULT_COVER')
            self.fileid = getattr(settings, 'WEIXIN_DEFAULT_COVER_ID')
        super(WeixinMp, self).save(*args, **kwargs)

        if self.sync and created:
            from sae.taskqueue import add_task
            task_link = reverse('weixinmp.upload', args=(self.pk,))
            add_task('weixin', task_link, delay=300)
            task_link = reverse('weixinmp.image_collect', args=(self.pk,))
            add_task('weixin', task_link, delay=180)

        if created:
            event = Event(user=self.user, content_object=self)
            event.save()
示例#22
0
    def save(self, *args, **kwargs):
        """ Auto add timestamp when saved."""
        created = not self.id
        if not self.digest:
            self.digest = self.get_digest()
        if not self.cover_img:
            self.cover_img = getattr(settings, 'WEIXIN_DEFAULT_COVER')
            self.fileid = getattr(settings, 'WEIXIN_DEFAULT_COVER_ID')
        super(WeixinMp, self).save(*args, **kwargs)

        if self.sync and created:
            from sae.taskqueue import add_task
            task_link = reverse('weixinmp.upload', args=(self.pk, ))
            add_task('weixin', task_link, delay=300)
            task_link = reverse('weixinmp.image_collect', args=(self.pk, ))
            add_task('weixin', task_link, delay=180)

        if created:
            event = Event(user=self.user, content_object=self)
            event.save()
示例#23
0
    def save(self,  *args,  **kwargs):
        """ Auto add timestamp when saved."""
        self.meta_link = re.sub(r'\W', '-', self.meta_link)
        created = not self.id

        super(Note, self).save(*args, **kwargs)
        # Change since 2014/04/02 . Signal was canceled.
        if not self.is_private and not settings.DEBUG:
            try:
                bd_pingback.pingback(self)
            except Exception:
                pass

        # Add timeline update.
        if created:
            event = Event(user=self.user, content_object=self)
            event.save()

        # Add task to taskqueue for search indexes update.
        from sae.taskqueue import add_task
        add_task('task1', '/backends/updateindex/')
示例#24
0
文件: post.py 项目: dreambt/cms4p
    def post(self):
        rspd = {"status": 200, "msg": "OK"}
        try:
            tf = {'true': 1, 'false': 0}
            act = self.get_argument("act", '').encode('utf-8')
            post_dic = {
            'category_id': self.get_argument("category_id", '-').encode('utf-8'),
            'user_id': self.get_secure_cookie("user_id"),
            'title': self.get_argument("title").encode('utf-8'),
            'digest': '-',
            'content': self.get_argument("content").encode('utf-8'),
            'image_url': '-',
            'tags': ','.join(self.get_arguments("tag")),
            'allow_comment': tf[self.get_argument("clo", 'false')],
            'top': tf[self.get_argument("top", 'false')],
            'password': self.get_argument("password", '').encode('utf-8'),
            'salt': '-',
            }
        except:
            rspd['status'] = 500
            rspd['msg'] = "用户名、邮箱均为必填项!"
            self.write(json.dumps(rspd))
            return

        if act == 'add':
            Posts.create(post_dic)
        elif act == 'edit':
            post_dic['post_id'] = int(self.get_argument("post_id", ""))
            Posts.update(post_dic)

        clear_cache_by_pathlist(['/'])

        if not debug:
            add_task('default', '/task/pingrpctask')

        self.set_header("Content-Type", "application/json")
        rspd['msg'] = "成功保存文章!"
        self.write(json.dumps(rspd))
示例#25
0
    def POST(self): 
        news = pickle.loads(web.data()) #TODO: too dangerous here
        try:
            if 'content' not in news:
                page = PageContentParser(news['link'])
                news['content'] = page.getMainContent()
            # if hasattr(page, 'getTitle'):
            if not news.get('title', None):
                news['title'] = page.getTitle()
            news['title'] = getattr(page, 'getTitlePrefix', lambda: '')() + news['title']
            if 'pubDate' not in news:
                content = getattr(news['content'], 'get_text', lambda : news['content'])()
                news['pubDate'] = dateSniffer(content) or datetime.today()

            # on Jan 29, 2013 to add school-notices in howareyou
            notice = {'title': news['title'], 'date': news['pubDate'], 'link': news['link'], 'dept': news['tbln']}
            add_task('web_content_fetcher', '/notice', str(notice))

            self.db.insert(news.pop('tbln'), **news)
        except Exception, e:
            if const.isLocal:
                traceback.print_exc()
                os._exit(1)
            raise
示例#26
0
def add_event_task(event, data):
    uri = '/task/{event}/'.format(event=event)
    logging.debug('add %s task %s' % (event, data))
    return add_task('queue', uri, json.dumps(data))
示例#27
0
文件: admin.py 项目: yobin/saepy-log
 def get(self):
     for n in range(len(XML_RPC_ENDPOINTS)):
         add_task('default', '%s/task/pingrpc/%d' % (BASE_URL, n))
     self.write(str(time()))
示例#28
0
文件: admin.py 项目: yobin/saepy-log
    def post(self):
        self.set_header('Content-Type', 'application/json')
        rspd = {'status': 201, 'msg': 'ok'}

        try:
            tf = {'true': 1, 'false': 0}
            timestamp = int(time())
            content = self.get_argument("con")
            if getAttr('MARKDOWN'):
                #content = markdown.markdown(parse_text(content))
                content = content.encode("utf-8")
            post_dic = {
                'category': self.get_argument("cat"),
                'title': self.get_argument("tit"),
                'content': content,
                'tags': self.get_argument("tag", '').replace(u',', ','),
                'closecomment': self.get_argument("clo", '0'),
                'password': self.get_argument("password", ''),
                'add_time': timestamp,
                'edit_time': timestamp,
                'archive': genArchive(),
            }
            if MYSQL_TO_KVDB_SUPPORT:
                post_dic['comment_num'] = '0'

            if post_dic['tags']:
                tagslist = set(
                    [x.strip() for x in post_dic['tags'].split(',')])
                try:
                    tagslist.remove('')
                except:
                    pass
                if tagslist:
                    post_dic['tags'] = ','.join(tagslist)
            post_dic['closecomment'] = tf[post_dic['closecomment'].lower()]
        except:
            rspd['status'] = 500
            rspd['msg'] = '错误: 注意必填的三项'
            self.write(json.dumps(rspd))
            return

        postid = Article.add_new_article(post_dic)
        if postid:
            keyname = 'pv_%s' % (str(postid))
            set_count(keyname, 0, 0)

            Category.add_postid_to_cat(post_dic['category'], str(postid))
            Archive.add_postid_to_archive(genArchive(), str(postid))
            increment('Totalblog')
            if post_dic['tags']:
                Tag.add_postid_to_tags(post_dic['tags'].split(','),
                                       str(postid))

            rspd['status'] = 200
            rspd[
                'msg'] = '完成: 你已经成功添加了一篇文章 <a href="/t/%s" target="_blank">查看</a>' % str(
                    postid)
            #clear_cache_by_pathlist(['/', 'cat:%s' % quoted_string(post_dic['category']), 'post_list_index',])
            clear_all_cache()  #yobin 20160921

            if not debug:
                add_task('default', '/task/pingrpctask')

            self.write(json.dumps(rspd))
            return
        else:
            rspd['status'] = 500
            rspd['msg'] = '错误: 未知错误,请尝试重新提交'
            self.write(json.dumps(rspd))
            return
示例#29
0
文件: utils.py 项目: cnspica/ifwechat
def add_event_task(event, data):
    uri = '/task/{event}/'.format(event=event)
    logging.debug('add %s task %s' % (event, data))
    return add_task('queue', uri, json.dumps(data))
示例#30
0
 def get(self):
     for n in range(len(XML_RPC_ENDPOINTS)):
         add_task('default', '%s/task/pingrpc/%d' % (BASE_URL, n))
     self.write(str(time()))
示例#31
0
    def get(self, page=None):

        if page is not None:
            url = "http://jandan.net/pic/page-%s" % page
        else:
            url = "http://jandan.net/pic"

        req = urllib2.Request(url)
        req.add_header(
            "User-Agent",
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.47 Safari/536.11",
        )
        response = urllib2.urlopen(req)
        the_page = response.read()

        soup = BeautifulSoup(the_page)

        if page is None:

            current_page = soup.find("span", {"class": "current-comment-page"})
            current_page = current_page.text[1:-1]

            if current_page:
                current_page = int(current_page)

            # 如果没有页码则检索所有页面
            if not self.mc.get("jandan_current_page"):

                # for i in range(1, current_page+1):
                # add_task('fetch', '/fetch_jandan/%s' % i)

                self.mc.set("jandan_current_page", current_page)
                return
            # 如果换页了要检索上一页
            elif int(self.mc.get("jandan_current_page")) < current_page:
                add_task("fetch", "/fetch_jandan/%s" % (current_page - 1))

            self.mc.set("jandan_current_page", current_page)
        else:
            current_page = page

        pics = []
        for comment in list(soup.findAll("li", attrs={"id": re.compile("^comment")})):

            images = list(comment.findAll("img"))

            if len(images) < 2:
                continue

            pic = {}

            unique_id = comment["id"]
            pic["unique_id"] = hashlib.sha1(unique_id).hexdigest()
            pic["url"] = images[1]["src"]

            text = []
            for p in list(comment.findAll("p")):

                for img in list(p.findAll("img")):
                    img.extract()

                text.append(p.text)

            pic["desc"] = "".join(text)
            pic["add_time"] = time.time()
            pic["source_url"] = "http://jandan.net/pic/page-%s#%s" % (current_page, unique_id)

            old_pic = self.db.get("SELECT * FROM pics WHERE unique_id='%s'" % pic["unique_id"])

            # 已存在跳过
            if old_pic:
                continue

            pics.append(pic)

        pics.reverse()

        for pic in pics:

            sql = (
                """INSERT INTO pics (unique_id, url, width, height, source_url, `from`, `desc`, add_time)
                 VALUES('%(unique_id)s','%(url)s', '0', '0', '%(source_url)s', 'jandan','%(desc)s', '%(add_time)s')"""
                % pic
            )

            row_id = self.db.execute(sql)
            if row_id:
                payload = "id=%(id)s&url=%(url)s" % {"id": row_id, "url": pic["url"]}
                add_task("download_image", "/download_image", payload)

        soup = None

        self.write("fetched %s images" % len(pics))
示例#32
0
文件: admin.py 项目: yobin/saepy-log
    def post(self):
        self.set_header("Content-Type", "application/json")
        rspd = {"status": 201, "msg": "ok"}

        try:
            tf = {"true": 1, "false": 0}
            timestamp = int(time())
            content = self.get_argument("con")
            if getAttr("MARKDOWN"):
                # content = markdown.markdown(parse_text(content))
                content = content.encode("utf-8")
            post_dic = {
                "category": self.get_argument("cat"),
                "title": self.get_argument("tit"),
                "content": content,
                "tags": self.get_argument("tag", "").replace(u",", ","),
                "closecomment": self.get_argument("clo", "0"),
                "password": self.get_argument("password", ""),
                "add_time": timestamp,
                "edit_time": timestamp,
                "archive": genArchive(),
            }
            if MYSQL_TO_KVDB_SUPPORT:
                post_dic["comment_num"] = "0"

            if post_dic["tags"]:
                tagslist = set([x.strip() for x in post_dic["tags"].split(",")])
                try:
                    tagslist.remove("")
                except:
                    pass
                if tagslist:
                    post_dic["tags"] = ",".join(tagslist)
            post_dic["closecomment"] = tf[post_dic["closecomment"].lower()]
        except:
            rspd["status"] = 500
            rspd["msg"] = "错误: 注意必填的三项"
            self.write(json.dumps(rspd))
            return

        postid = Article.add_new_article(post_dic)
        if postid:
            keyname = "pv_%s" % (str(postid))
            set_count(keyname, 0, 0)

            Category.add_postid_to_cat(post_dic["category"], str(postid))
            Archive.add_postid_to_archive(genArchive(), str(postid))
            increment("Totalblog")
            if post_dic["tags"]:
                Tag.add_postid_to_tags(post_dic["tags"].split(","), str(postid))

            rspd["status"] = 200
            rspd["msg"] = '完成: 你已经成功添加了一篇文章 <a href="/t/%s" target="_blank">查看</a>' % str(postid)
            # clear_cache_by_pathlist(['/', 'cat:%s' % quoted_string(post_dic['category']), 'post_list_index',])
            clear_all_cache()  # yobin 20160921

            if not debug:
                add_task("default", "/task/pingrpctask")

            self.write(json.dumps(rspd))
            return
        else:
            rspd["status"] = 500
            rspd["msg"] = "错误: 未知错误,请尝试重新提交"
            self.write(json.dumps(rspd))
            return