示例#1
0
 def run(self):
     try:
         urls_article = [
             'https://mp.weixin.qq.com/s?src=11&timestamp=1541559601&ver=1229&signature=ixTsG-RvK8H58t6D-CpW6olWI8hA52Wz-FRb12ZcrNG-lxR20YutoyLYUr-RB3w8WHjE1petjDcbbxZVxTChvPWM27qszWu0Z3zonjx8SEQB5mmgm1O9Eu*5qsFhnBCH&new=1'
         ]
         entity = None
         backpack_list = []
         ftp_list = []
         ftp_info = None
         for page_count, url in enumerate(urls_article):
             # if page_count < 15:
             #     continue
             html = requests.get(url)
             # 确定account信息
             name = pq(html.text)('#js_name').text()
             account_name = pq(
                 html.text)('.profile_meta_value').eq(0).text()
             log('---{}---{}---'.format(name, account_name))
             account = Account()
             account.name = name
             account.account = account_name
             account.get_account_id()
             article = Article()
             try:
                 article.create(url, account)
             except RuntimeError as run_error:
                 log('找不到浏览器 {}'.format(run_error))
             log('第{}条 文章标题: {}'.format(page_count, article.title))
             log("当前文章url: {}".format(url))
             entity = JsonEntity(article, account)
             log('当前文章ID: {}'.format(entity.id))
             # if entity.id in ids:
             #     log('当前文章已存在,跳过')
             #     continue
             backpack = Backpack()
             backpack.create(entity)
             backpack_list.append(backpack.create_backpack())
             # self.save_to_mysql(entity)
             self.save_to_mongo(entity.to_dict())
             # ftp包
             ftp_info = Ftp(entity)
             name_xml = ftp_info.hash_md5(ftp_info.url)
             log('当前文章xml: {}'.format(name_xml))
             self.create_xml(ftp_info.ftp_dict(), name_xml)
             ftp_list.append(name_xml)
             # if page_count >= 3:
             #     break
         log("发包")
         # todo 发包超时,修改MTU
         if ftp_info is not None:
             entity.uploads_ftp(ftp_info, ftp_list)
         if entity:
             # entity.uploads(backpack_list)
             entity.uploads_datacenter_relay(backpack_list)
             entity.uploads_datacenter_unity(backpack_list)
         log("发包完成")
     except Exception as e:
         log("解析公众号错误 {}".format(e))
         if 'chrome not reachable' in str(e):
             raise RuntimeError('chrome not reachable')
示例#2
0
    def run(self):
        # self.set_name()
        # while True:
        account_list = ['有看投',]
        entity = None
        backpack_list = []
        for name in account_list:
            self.name = name
            html_account = self.account_homepage()
            if html_account:
                html, account_of_homepage = html_account
            else:
                continue
            log('start 公众号: ', self.name)
            urls_article = self.urls_article(html)

            account = Account()
            account.name = self.name
            account.account = account_of_homepage
            account.get_account_id()
            # account.account_id = 126774646

            for page_count, url in enumerate(urls_article):
                # if page_count < 35:
                #     continue
                article = Article()
                article.create(url, account)
                log('文章标题:', article.title)
                log("第{}条".format(page_count))

                entity = JsonEntity(article, account)
                backpack = Backpack()
                backpack.create(entity)
                backpack_list.append(backpack.create_backpack())
                import pymongo
                conn = pymongo.MongoClient('mongo')
                # 上传数据库
                sql = '''
                        INSERT INTO
                            account_http(article_url, addon, account, account_id, author, id, title)
                        VALUES
                            (%s, %s, %s, %s, %s, %s, %s)
                '''
                _tuple = (
                    article.url, datetime.datetime.now(), entity.account, entity.account_id, entity.author, entity.id,
                    entity.title
                )
                uploads_mysql(config_mysql, sql, _tuple)
                if page_count == 4:
                    break

        log("发包")
        if entity:
            # entity.uploads(backpack_list)
            # entity.uploads_datacenter_relay(backpack_list)
            entity.uploads_datacenter_unity(backpack_list)
            print('end')
示例#3
0
 def run(self):
     count = 0
     while True:
         # ADD_COLLECTION 补采账号  get_account 日常采集; 使用account_list 兼容单个账号和账号列表
         account_list = ADD_COLLECTION if ADD_COLLECTION else self.get_account(
         )
         # length = len(threading.enumerate())  # 枚举返回个列表
         # log.info('当前运行的线程数为:{}'.format(threading.active_count()))
         count += 1
         log.info('第{}次'.format(count))
         if account_list is None:
             log.info('调度队列为空,休眠5秒')
             time.sleep(5)
             continue
         for account_name in account_list:
             try:
                 self.search_name = account_name
                 html_account = self.account_homepage()
                 if html_account:
                     html = html_account
                 else:
                     log.info('{}|找到不到微信号'.format(account_name))
                     continue
                 urls_article = self.urls_article(html)
                 # 确定account信息
                 account = Account()
                 account.name = self.name
                 account.account = account_name
                 account.tags = self.get_tags()
                 account.get_account_id()
                 # 判重
                 ids = self.dedup(account_name) if JUDEG else ''
                 entity = None
                 backpack_list = []
                 ftp_list = []
                 ftp_info = None
                 for page_count, url in enumerate(urls_article):
                     # if page_count < 15:
                     #     continue
                     article = Article()
                     try:
                         article.create(url, account)
                     except RuntimeError as run_error:
                         log.info('微信验证码错误 {}'.format(run_error))
                     log.info('第{}条 文章标题: {}'.format(
                         page_count, article.title))
                     log.info("当前文章url: {}".format(url))
                     entity = JsonEntity(article, account)
                     log.info('当前文章ID: {}'.format(entity.id))
                     if entity.id in ids and JUDEG is True:
                         log.info('当前文章已存在,跳过')
                         # if page_count >= 20:
                         #     log.info('超过20篇文章,跳出')
                         #     break
                         continue
                     backpack = Backpack()
                     backpack.create(entity)
                     backpack_list.append(backpack.create_backpack())
                     # self.save_to_mysql(entity)
                     # self.save_to_mongo(entity.to_dict())
                     # ftp包
                     ftp_info = Ftp(entity)
                     name_xml = ftp_info.hash_md5(ftp_info.url)
                     log.info('当前文章xml: {}'.format(name_xml))
                     self.create_xml(ftp_info.ftp_dict(), name_xml)
                     ftp_list.append(name_xml)
                     # break
                 log.info("开始发包")
                 # todo 发包超时,修改MTU
                 if ftp_info is not None:
                     entity.uploads_ftp(ftp_info, ftp_list)
                     log.info("ftp发包完成")
                 if entity and backpack_list:
                     # entity.uploads(backpack_list)
                     entity.uploads_datacenter_relay(backpack_list)
                     entity.uploads_datacenter_unity(backpack_list)
                     log.info("数据中心,三合一,发包完成")
             except Exception as e:
                 log.exception("解析公众号错误 {}".format(e))
                 if 'chrome not reachable' in str(e):
                     raise RuntimeError('chrome not reachable')
         if ADD_COLLECTION:
             break
示例#4
0
    def run(self):
        count = 0
        while True:
            # ADD_COLLECTION 补采账号  get_account 日常采集; 使用account_list 兼容单个账号和账号列表
            account_list = ADD_COLLECTION if ADD_COLLECTION else self.get_account(
            )
            # length = len(threading.enumerate())  # 枚举返回个列表
            log.info('当前运行的线程数为:{}'.format(threading.active_count()))
            log.info('当前运行的进程:{}'.format(
                multiprocessing.current_process().name))
            count += 1
            log.info('第{}次'.format(count))
            if account_list is None:
                log.info('调度队列为空,休眠5秒')
                time.sleep(5)
                continue
            for account_name in account_list:
                try:
                    self.search_name = account_name
                    html_account = self.account_homepage()
                    if html_account:
                        html = html_account
                    else:
                        log.info('{}|找到不到微信号'.format(account_name))
                        continue
                    urls_article = self.urls_article(html)
                    # 确定account信息
                    account = Account()
                    account.name = self.name
                    account.account = account_name
                    account.tags = self.get_tags()
                    account.get_account_id()
                    # 判重 查底层
                    # ids = self.dedup(account_name) if JUDEG else ''
                    # 判重 redis
                    sentenced_keys = account.account + ' ' + str(
                        account.account_id)
                    keys = hash_md5(sentenced_keys)
                    log.info('keys: {}'.format(keys))
                    dedup_result = self.dedup_redis(keys)
                    post_dedup_urls = []

                    entity = None
                    backpack_list = []
                    ftp_list = []
                    ftp_info = None
                    for page_count, url in enumerate(urls_article):
                        try:
                            # if page_count > 5:
                            #     break
                            article = Article()
                            article.create(url, account, self.proxies)
                            log.info('第{}条 文章标题: {}'.format(
                                page_count, article.title))
                            log.info("当前文章url: {}".format(url))
                            entity = JsonEntity(article, account)
                            log.info('当前文章ID: {}'.format(entity.id))
                            article_date = datetime.datetime.fromtimestamp(
                                int(str(article.time)[:-3]))
                            day_diff = datetime.date.today(
                            ) - article_date.date()
                            if day_diff.days > 15:
                                log.info(
                                    '超过采集interval最大15天 的文章不采集,已采集{}条文章'.format(
                                        page_count))
                                self.count_articles(page_count)
                                break
                            if dedup_result:
                                # title_time_str = entity.title + str(entity.time)
                                # title_time_md5 = hash_md5(title_time_str)
                                if entity.id in dedup_result:
                                    log.info('当前文章已存在,跳过')
                                    continue
                                else:
                                    post_dedup_urls.append(entity.id)
                            else:
                                # title_time_str = entity.title + str(entity.time)
                                # title_time_md5 = hash_md5(title_time_str)
                                post_dedup_urls.append(entity.id)

                            # dedup_result = self.dedup_redis(entity)
                            # if dedup_result:
                            #     log.info('当前文章已存在,跳过')
                            # ids = ids.append({'key': entity.id, 'urls': entity.url})
                            # if entity.id in ids and JUDEG is True:
                            #     log.info('当前文章已存在,跳过')
                            #     continue
                            backpack = Backpack()
                            backpack.create(entity)
                            backpack_list.append(backpack.create_backpack())
                            # self.save_to_mysql(entity)
                            # self.save_to_mongo(entity.to_dict())
                            # ftp包
                            ftp_info = Ftp(entity)
                            name_xml = ftp_info.hash_md5(ftp_info.url)
                            log.info('当前文章xml: {}'.format(name_xml))
                            self.create_xml(ftp_info.ftp_dict(), name_xml)
                            ftp_list.append(name_xml)
                        except Exception as run_error:
                            log.info('微信解析文章错误 {}'.format(run_error))
                            continue

                    log.info("开始发包")
                    if entity and backpack_list:
                        # 直接发底层
                        # entity.uploads(backpack_list)
                        entity.uploads_datacenter_relay(backpack_list)
                        entity.uploads_datacenter_unity(backpack_list)
                        log.info("数据中心,三合一,发包完成")
                    else:
                        log.info('包列表为空,不发送数据')
                        continue
                    # todo 发包超时,修改MTU
                    if ftp_info is not None:
                        entity.uploads_ftp(ftp_info, ftp_list)
                        log.info("ftp发包完成")
                    if post_dedup_urls:
                        log.info('上传判重中心key:{} urls:{}'.format(
                            keys, post_dedup_urls))
                        url = 'http://47.100.53.87:8008/Schedule/CacheWx'
                        data = [{
                            "key": keys,
                            "sourceNodes": "1",
                            "sourceType": "2",
                            "urls": post_dedup_urls
                        }]
                        r = requests.post(url,
                                          data=json.dumps(data),
                                          timeout=self.timeout)
                        log.info('上传判重中心结果{}'.format(r.status_code))
                except Exception as e:
                    log.exception("解析公众号错误 {}".format(e))
                    time.sleep(30)
                    if ('chrome not reachable'
                            in str(e)) or ('Message: timeout' in str(e)):
                        raise RuntimeError('chrome not reachable')
            if ADD_COLLECTION:
                break
示例#5
0
 def run(self):
     count = 0
     while True:
         count += 1
         log.info('第{}次'.format(count))
         account_list = ADD_COLLECTION if ADD_COLLECTION else self.account_list(
         )
         # if account_list:
         #     continue
         # for account_name in account_list:
         try:
             for account_name in account_list:
                 log.info('第{}次'.format(count))
                 self.search_name = account_name
                 html_account = self.account_homepage()
                 if html_account:
                     html = html_account
                 else:
                     log.info('找到不到微信号首页: '.format(account_name))
                     continue
                 urls_article = self.urls_article(html)
                 # 确定account信息
                 account = Account()
                 account.name = self.name
                 account.account = account_name
                 account.get_account_id()
                 # 判重
                 ids = self.dedup(account_name)
                 entity = None
                 backpack_list = []
                 ftp_list = []
                 ftp_info = None
                 for page_count, url in enumerate(urls_article):
                     # if page_count < 15:
                     #     continue
                     article = Article()
                     try:
                         article.create(url, account)
                     except RuntimeError as run_error:
                         log.info('找不到浏览器 {}'.format(run_error))
                     log.info('第{}条 文章标题: {}'.format(
                         page_count, article.title))
                     log.info("当前文章url: {}".format(url))
                     entity = JsonEntity(article, account)
                     log.info('当前文章ID: {}'.format(entity.id))
                     if entity.id in ids:
                         log.info('当前文章已存在,跳过')
                         continue
                     backpack = Backpack()
                     backpack.create(entity)
                     backpack_list.append(backpack.create_backpack())
                     # self.save_to_mysql(entity)
                     self.save_to_mongo(entity.to_dict())
                     # ftp包
                     ftp_info = Ftp(entity)
                     name_xml = ftp_info.hash_md5(ftp_info.url)
                     log.info('当前文章xml: {}'.format(name_xml))
                     self.create_xml(ftp_info.ftp_dict(), name_xml)
                     ftp_list.append(name_xml)
                     # if page_count >= 3:
                     #     break
                 log.info("发包")
                 # todo 发包超时,修改MTU
                 if ftp_info is not None:
                     entity.uploads_ftp(ftp_info, ftp_list)
                 if entity:
                     # entity.uploads(backpack_list)
                     entity.uploads_datacenter_relay(backpack_list)
                     entity.uploads_datacenter_unity(backpack_list)
                 log.info("发包完成")
         except Exception as e:
             log.exception("解析公众号错误 {}".format(e))
             if 'chrome not reachable' in str(e):
                 raise RuntimeError('chrome not reachable')
             continue
示例#6
0
 def run(self):
     count = 0
     while True:
         count += 1
         log.info('第{}次'.format(count))
         # ADD_COLLECTION 补采账号  get_account 日常采集; 使用account_list 兼容单个账号和账号列表
         account_list = ADD_COLLECTION if ADD_COLLECTION else self.get_account()
         if account_list is None:
             log.info('调度队列为空,休眠5秒')
             time.sleep(5)
         for account_name in account_list:
             try:
                 self.search_name = account_name
                 html_account = self.account_homepage()
                 if html_account:
                     html = html_account
                 else:
                     log.info('{}|找到不到微信号'.format(account_name))
                     continue
                 urls_article = self.urls_article(html)
                 # 确定account信息
                 account = Account()
                 account.name = self.name
                 account.account = account_name
                 account.tags = self.get_tags()
                 account.get_account_id()
                 if not account.account_id:
                     log.info('没有account_id'.format(self.name))
                     break
                 # 判重
                 ids = self.dedup(account_name) if JUDEG else ''
                 entity = None
                 backpack_list = []
                 ftp_list = []
                 ftp_info = None
                 for page_count, url in enumerate(urls_article):
                     # if page_count < 15:
                     #     continue
                     article = Article()
                     try:
                         article.create(url, account)
                     except RuntimeError as run_error:
                         log.info('找不到浏览器 {}'.format(run_error))
                     log.info('第{}条 文章标题: {}'.format(page_count, article.title))
                     log.info("当前文章url: {}".format(url))
                     entity = JsonEntity(article, account)
                     log.info('当前文章ID: {}'.format(entity.id))
                     if entity.id in ids and JUDEG is True:
                         log.info('当前文章已存在,跳过0')
                         # continue
                     backpack = Backpack()
                     backpack.create(entity)
                     backpack_list.append(backpack.create_backpack())
                     # self.save_to_mysql(entity)
                     # self.save_to_mongo(entity.to_dict())
                     # if page_count >= 3:
                     #     break
                 log.info("开始发包")
                 if entity and backpack_list:
                     entity.uploads(backpack_list)
                     log.info("发包完成")
             except Exception as e:
                 log.exception("解析公众号错误 {}".format(e))
                 if 'chrome not reachable' in str(e):
                     raise RuntimeError('chrome not reachable')
示例#7
0
    def run(self):
        html_account = self.account_homepage()
        if html_account:
            html, account_of_homepage = html_account
        else:
            self.send_result()
            return
        log('start 公众号: ', self.name)
        urls_article = self.urls_article(html)

        account = Account()
        account.name = self.name
        account.account = account_of_homepage
        account.get_account_id()

        articles = []
        backpack_list = []
        positive_article = 0
        nagetive_article = 0
        for page_count, url in enumerate(urls_article):
            # if page_count > 2:
            #     break
            article = Article()
            log('url:', url)
            article.create(url, self.name)
            log('第{}条, 文章标题: {}'.format(page_count, article.title))

            # 超过7天不管
            if article.time:
                article_date = datetime.datetime.fromtimestamp(
                    int(article.time[:-3]))
                day_diff = datetime.datetime.now().date() - article_date.date()
                if day_diff.days > 6:
                    break
            # 统计文章正负面
            count_positive, count_nagetive = self.emotion_judge(
                article.content)
            if count_positive > count_nagetive:
                positive_article += 1
            else:
                nagetive_article += 1
            entity = JsonEntity(article, account)
            backpack = Backpack()
            backpack.create(entity)
            backpack_list.append(backpack.create_backpack())
            # 所有文章
            article_info = backpack.to_dict()
            articles.append(article_info)
        log('所有文章抓取完毕')
        content_all_list = ''
        for article in articles:
            content_all_list += article.get('Content')
        log('文章长度', len(content_all_list))
        # 分词处理
        key_words_list = []
        GETNER_API_URL = 'http://221.204.232.7:40015/NER/GetNer'
        data = {
            "texts": [content_all_list],
        }
        log('请求分词')
        response = requests.post(url=GETNER_API_URL, data=data, timeout=180)
        ner_result = response.json().get('rst')[0]
        if ner_result.get('status') == 'success':
            org_dic = ner_result.get('ner').get('ORG')
            loc_dic = ner_result.get('ner').get('LOC')
            per_dic = ner_result.get('ner').get('PER')
            if org_dic:
                for i in org_dic.items():
                    key_words_list.append(i)
            if loc_dic:
                for i in loc_dic.items():
                    key_words_list.append(i)
            if per_dic:
                for i in per_dic.items():
                    key_words_list.append(i)

        # 返回前20个出现频率最高的词
        key_words = dict()
        key_words['list'] = []
        key_words_list = sorted(key_words_list,
                                key=lambda x: x[1],
                                reverse=True)[:21]
        for k in key_words_list:
            key_words['list'].append({"times": k[1], "keyword": k[0]})
        log('分词完成')
        # 处理文章
        result = handle(articles)
        result['KeyWord'] = key_words
        result['ArtPosNeg'] = {
            'Indicate': {
                'Positive': positive_article,
                'Negative': nagetive_article
            }
        }
        result['Success'] = True
        result['Account'] = self.name
        result['Message'] = ''
        db['newMedia'].update({'Account': self.name},
                              {'$set': {
                                  'data': result
                              }})
        log('{} 抓取完成'.format(self.name))
        # 向前端发送成功请求
        self.status = 3
        self.send_result()
示例#8
0
    def run(self):
        html_account = self.account_homepage()
        if html_account:
            html, account_of_homepage = html_account
        else:
            # self.send_result()
            return
        log('start 公众号: ', self.name)
        urls_article = self.urls_article(html)

        account = Account()
        account.name = self.name
        account.account = account_of_homepage
        account.get_account_id()

        articles = []
        backpack_list = []
        positive_article = 0
        nagetive_article = 0
        for page_count, url in enumerate(urls_article):
            # if page_count > 2:
            #     break
            article = Article()
            log('url:', url)
            article.create(url, self.name)
            log('文章标题:', article.title)
            log("第{}条".format(page_count))

            # 超过7天不管
            if article.time:
                article_date = datetime.datetime.fromtimestamp(
                    int(article.time[:-3]))
                day_diff = datetime.datetime.now().date() - article_date.date()
                if day_diff.days > 6:
                    break
            # 统计文章正负面
            count_positive, count_nagetive = self.emotion_judge(
                article.content)
            if count_positive > count_nagetive:
                positive_article += 1
            else:
                nagetive_article += 1
            entity = JsonEntity(article, account)
            backpack = Backpack()
            backpack.create(entity)
            backpack_list.append(backpack.create_backpack())
            # 所有文章
            article_info = backpack.to_dict()
            articles.append(article_info)
        log('所有文章抓取完毕')
        content_all_list = ''
        for article in articles:
            content_all_list += article.get('Content')
        # 分词处理
        key_words_list = []
        thu1 = thulac.thulac()
        seg_list = thu1.cut(''.join(content_all_list), text=False)
        for s in seg_list:
            if (len(s[0]) >= 2 and re.search('[\u4e00-\u9fff]+', s[0])
                    and s[1] in ['n', 'np', 'ns', 'ni', 'nz']):
                key_words_list.append(s[0])

        # 返回前20个出现频率最高的词
        key_words_counter = Counter(key_words_list).most_common(20)
        key_word = dict()
        key_word['list'] = []
        for k in key_words_counter:
            key_word['list'].append({"times": k[1], "keyword": k[0]})
        # 处理文章
        result = handle(articles)
        result['KeyWord'] = key_word
        result['ArtPosNeg'] = {
            'Indicate': {
                'Positive': positive_article,
                'Negative': nagetive_article
            }
        }
        result['Success'] = True
        result['Account'] = self.name
        result['Message'] = ''
        db['newMedia'].update({'Account': self.name},
                              {'$set': {
                                  'data': result
                              }})
        log('{} 抓取完成'.format(self.name))
        # 向前端发送成功请求
        self.status = 3
示例#9
0
    def run(self):
        self.set_key_uin()
        while True:
            _biz_list = self.biz_list()
            if _biz_list:
                entity = None
                for biz in _biz_list:
                    try:
                        self._biz = biz
                        self.create_url()
                        print('添加成功')
                        # self.url = 'https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MzA4OTEwNDUwMA==&uin=MTE1NjkxODg2MQ==&key=2e15abc1cc63c6472b3f9e24b445b1c19bb7dcee55cf4eb76c5363872c0f2f760899356828e84a3aeeb272cb0565257c52ac612c186648dbb4226484e2f04530a140a103860689fe7656df0d53f08ab5'
                        resp = requests.get(self.url, headers=self.headers)
                        # 响应结果为空即key失效
                        if len(resp.text) == 0:
                            log('key失效,获取新的key', self.url)
                            self.set_key_uin()
                            resp = requests.get(self.url, headers=self.headers)
                        else:
                            log('key有效,当前链接', self.url)
                        urls = self.urls_article(resp)

                        # 构建account
                        article = Article()
                        article.create(urls[0])
                        log("文章标题 {}".format(article.title))
                        # article.title = article.title.replace('【', '')
                        # article.title = article.title.replace('】', '')
                        # article.title = article.title.replace('!', '')

                        log(article.title)
                        account = Account()
                        account.name = article.author
                        # account.name = '中央纪委国家监委网站'
                        account.account = article.account
                        # account.account = 'gh_a78ef1e3d11e'
                        account.get_account_id()
                        account.account_id = 126774166
                        if not account.account:
                            log("错误,找不到account")

                        backpack_list = []
                        article_count = 0
                        for article_count, url in enumerate(urls):
                            log('文章链接', url)
                            article = Article()
                            article.create(url)
                            article.title = article.title.replace('.', '')
                            if '!' in article.title:
                                article.title = article.title.replace('!', '')
                            log("文章标题 {}".format(article.title))
                            entity = JsonEntity(article, account)
                            backpack = Backpack()

                            # 文章为分享,正则匹配不到时间,会异常
                            try:
                                backpack.create(entity)
                            except Exception as e:
                                log('share error', e)
                                continue
                            backpack_list.append(backpack.create_backpack())

                            # 上传数据库
                            sql = '''
                            INSERT INTO
                                account_http(article_url, addon, account, account_id, author, id, title)
                            VALUES
                                (%s, %s, %s, %s, %s, %s, %s)
                                    '''
                            _tuple = (entity.url, datetime.datetime.now(),
                                      entity.account, entity.account_id,
                                      entity.author, entity.id, entity.title)
                            uploads_mysql(config_mysql, sql, _tuple)
                            # if article_count == 4:
                            #     break
                        log('采集账号:{} 所有文章完毕,共{}条文章'.format(
                            self.name, article_count + 1))

                        log("发包")
                        if entity:
                            entity.uploads(backpack_list)
                            log("uploads successful")
                        print("end")
                        # 迭代一个账号
                        break
                    except Exception as e:
                        log('account error', e)
                        continue
示例#10
0
def weixin():
    if request.method == 'GET':
        try:
            data = request.args
            signature = data.get('signature')
            print(signature)
            timestamp = data.get('timestamp')
            print(timestamp)
            nonce = data.get('nonce')
            print(nonce)
            echostr = data.get('echostr')
            print(echostr)
            token = 'jsxnh'
            list = [token, timestamp, nonce]
            list.sort()
            sha1 = hashlib.sha1()
            map(sha1.update, list)
            hashcode = sha1.hexdigest()
            if True:
                return echostr
            else:
                return ""
        except Exception as e:
            return e
    else:
        rec = request.stream.read()
        xml_recv = ET.fromstring(rec)
        ToUserName = xml_recv.find("ToUserName").text
        FromUserName = xml_recv.find("FromUserName").text
        Content = xml_recv.find("Content").text
        re = "<xml><ToUserName><![CDATA[%s]]></ToUserName>"\
             "<FromUserName><![CDATA[%s]]></FromUserName>"\
             "<CreateTime>%s</CreateTime>"\
             "<MsgType><![CDATA[text]]></MsgType>"\
             "<Content><![CDATA[%s]]></Content></xml>"
        if Content == u'故事':
            reply = "<xml><ToUserName><![CDATA[%s]]></ToUserName><FromUserName><![CDATA[%s]]>" \
                    "</FromUserName><CreateTime>%s" \
                    "</CreateTime><MsgType><![CDATA[image]]></MsgType><Image>"\
                    "<MediaId><![CDATA[%s]]></MediaId>"\
                    "</Image></xml>"
            recv_content = randomstory()
            convert_to_picture(recv_content)
            msgid = get_media_ID('1.png')
            response = make_response(reply % (FromUserName, ToUserName, str(int(time.time())), msgid))
            response.content_type = 'application/xml'
            return response
        elif Content == u'allcount':
            accounts = Account.findAll()
            recv_content = ''
            for account in accounts:
                recv_content += account.getValue('id')+' '
                recv_content += account.getValue('app')+' '
                recv_content += account.getValue('account')+' '
                recv_content += account.getValue('password')+' '
                recv_content += account.getValue('message')+' '
            response = make_response(re % (FromUserName, ToUserName, str(int(time.time())), recv_content))
            response.content_type = 'application/xml'
            return response
        elif len(Content.split(' ')) >= 2:
            keyworks = Content.split(' ')
            keywork = keyworks[0]
            response = make_response(re % (FromUserName, ToUserName, str(int(time.time())), "ok"))
            response.content_type = 'application/xml'
            if keywork == u'add':
                acc = Account(id=keyworks[1], app=keyworks[2], account=keyworks[3], password=keyworks[4], message=keyworks[5])
                acc.save()
                return response
            elif keywork == u'remove':
                acc = Account.find(keyworks[1])
                acc.remove()
                return response
            elif keywork == u'update':
                acc = Account.find(keyworks[1])
                acc.app=keyworks[2]
                acc.account = keyworks[3]
                acc.password = keyworks[4]
                acc.message = keyworks[5]
                acc.update()
                return response
            elif keywork == u'find':
                acc = Account.find(keyworks[1])
                accstr = acc.account+"\n"+acc.password
                response_ = make_response(re % (FromUserName, ToUserName, str(int(time.time())), accstr))
                response_.content_type = 'application/xml'
                return response_
            else:
                return "success"
        else:
            return 'success'