def getFeed(self, request, queryset, *arg1, **arg2): logging.info(u'开始采集Feed') feed_retrieval_deadline = datetime.now() - timedelta(minutes=1200) for feed in queryset: if feed.last_retrieved > feed_retrieval_deadline: logging.info('Skipping feed %s.', feed.feedurl) continue logging.info('Getting feed %s.', feed.feedurl) try: result = getpage(feed.feedurl, 30) except Exception: logging.warning('Could not get feed %s ,and the fetch is restart now' % feed.feedurl) feed.last_retrieved = datetime.now() #feed.save() break if result.code == 200: self.__parse_feed(result.read(), feed.feedurl, feed.stop_target, feed.category, feed.latest, feed.start_target, feed.mid_target, feed.end_target, feed.allow_target) feed.last_retrieved = datetime.now() feed.save() elif result.code == 500: logging.error('Feed %s returned with status code 500.' % feed.feedurl) elif result.code == 404: logging.error('Error 404: Nothing found at %s.' % feed.feedurl)
def getFeed(self, request, queryset, *arg1, **arg2): logging.info(u'开始采集Feed') feed_retrieval_deadline = datetime.now() - timedelta(minutes=1200) for feed in queryset: if feed.last_retrieved > feed_retrieval_deadline: logging.info('Skipping feed %s.', feed.feedurl) continue logging.info('Getting feed %s.', feed.feedurl) try: result = getpage(feed.feedurl, 30) except Exception: logging.warning( 'Could not get feed %s ,and the fetch is restart now' % feed.feedurl) feed.last_retrieved = datetime.now() #feed.save() break if result.code == 200: self.__parse_feed(result.read(), feed.feedurl, feed.stop_target, feed.category, feed.latest, feed.start_target, feed.mid_target, feed.end_target, feed.allow_target) feed.last_retrieved = datetime.now() feed.save() elif result.code == 500: logging.error('Feed %s returned with status code 500.' % feed.feedurl) elif result.code == 404: logging.error('Error 404: Nothing found at %s.' % feed.feedurl)
def __parse_feed(self, feed_content, feed_url, stop_target, category, feed_latest, start_target, mid_target, end_target, allow_target): feed = feedparser.parse(feed_content) i = 0 dead_i = 0 for entry in feed.entries: logging.info('start parse feed,the dead_i is %s', dead_i) title = htmllib.decoding(entry.title) categorie_keys = [] content = '' date_published = datetime.now() author_name = '' Mystat = True if self.__feedslist_check(title) == False: try: i += 1 url = '' logging.info('beging to add new article No. %s', i) if (entry.has_key('feedburner_origlink')): url = entry.feedburner_origlink else: url = entry.link if entry.has_key('content'): content = entry.content[0].value else: content = entry.description if entry.has_key('author'): author_name = entry.author else: author_name = "转载" stripper = HTMLStripper() stripper.feed(title) title = stripper.get_data() content = htmllib.decoding(content) content = htmllib.GetFeedclean(url, content, stop_target) if (entry.has_key('updated_parsed')): date_published = datetime(*entry.updated_parsed[:6]) else: date_published = datetime.now() except Exception, data: logging.warn( 'this like something happened,the error is %s', data) try: feedresult = self.__store_article(title, url, category, content, date_published, author_name, feed_url, feed) if feedresult == True: logging.info('The No.%s is fetched to the db', i) else: logging.error('The No.%s is fetched Fail', i) Mystat = False except Exception, data: logging.warning('the error is %s', data) Mystat = False
def __saveImages(self, name, image): try: path = os.path.join(settings.MEDIA_ROOT, 'cache/%s' % name) f = file(path, "wb") f.write(image) f.close() return "%scache/%s" % (settings.MEDIA_URL, name) except Exception, e: logging.error(e) return False
def __store_images(self, content, name, model): try: #media = model.get_or_create(pk=model.pk) #media.mtype, media.width, media.height = htmllib.getImageInfo(content) model.newurl = self.__saveImages(name, content) model.stat = 1 model.save() return True except Exception, data: model.stat = 2 logging.error('the db saved error is: %s', data)
def __parse_feed(self, feed_content, feed_url, stop_target, category, feed_latest, start_target, mid_target, end_target, allow_target): feed = feedparser.parse(feed_content) i = 0 dead_i = 0 for entry in feed.entries: logging.info('start parse feed,the dead_i is %s', dead_i) title = htmllib.decoding(entry.title) categorie_keys = [] content = '' date_published = datetime.now() author_name = '' Mystat = True if self.__feedslist_check(title) == False: try: i += 1 url = '' logging.info('beging to add new article No. %s', i) if(entry.has_key('feedburner_origlink')): url = entry.feedburner_origlink else: url = entry.link if entry.has_key('content'): content = entry.content[0].value else: content = entry.description if entry.has_key('author'): author_name = entry.author else: author_name = "转载" stripper = HTMLStripper() stripper.feed(title) title = stripper.get_data() content = htmllib.decoding(content) content = htmllib.GetFeedclean(url, content, stop_target) if(entry.has_key('updated_parsed')): date_published = datetime(*entry.updated_parsed[:6]) else: date_published = datetime.now() except Exception, data: logging.warn('this like something happened,the error is %s', data) try: feedresult = self.__store_article(title, url, category, content, date_published, author_name, feed_url, feed) if feedresult == True: logging.info('The No.%s is fetched to the db', i) else: logging.error('The No.%s is fetched Fail', i) Mystat = False except Exception, data: logging.warning('the error is %s', data) Mystat = False
def __store_entry(self, feed): try: entry, result = Entry.published.get_or_create(title=feed.title) entry.excerpt = feed.excerpt entry.status = 2 entry.author_name = feed.author_name entry.date = feed.date entry.slug = htmllib.sid() entry.content = self.__Parse_image(feed.content) entry.categories.add(feed.feed.category) entry.save() feed.fetch_stat = 4 feed.save() except Exception, data: logging.error('the db saved error is: %s', data) feed.fetch_stat = 3 feed.save()
def module_run(request): import rpyc # 导入远程通信模块 from cPickle import loads put_string = "" # 前端选取的配置主机,需要配置的功能模块ID号 # 前端选取的功能模块的ID if not 'ModuleID' in request.GET: Module_Id = "" else: Module_Id = request.GET['ModuleID'] put_string += Module_Id + "@@" # 前端选取的主机 if not 'hosts' in request.GET: Hosts = "" else: Hosts = request.GET['hosts'] put_string += Hosts + "@@" # 有的模块所需要的系统参数1 if not 'sys_param_1' in request.GET: Sys_param_1 = "" else: Sys_param_1 = request.GET['sys_param_1'] put_string += Sys_param_1 + "@@" # 有的模块所需要的系统参数2 if not 'sys_param_2' in request.GET: Sys_param_2 = "" else: Sys_param_2 = request.GET['sys_param_2'] put_string += Sys_param_2 + "@@" # 连接rpyc 服务器 try: print settings.RPYC_SET conn = rpyc.connect(settings.RPYC_SET['HOST'], settings.RPYC_SET['PORT']) conn.root.login(settings.RPYC_SET['USER'], settings.RPYC_SET['KEY']) except Exception, e: logging.error('connect rpyc server ERROR:' + str(e)) return HttpResponse('connect rpyc server ERROR:' + str(e))
def ywgl(request): OPresult="" hostgroup = Hostgroup.objects.all() modulelist = ModuleList.objects.all() if request.POST: put_string="" memberlist = request.POST.getlist("memberlist","") hosts = ','.join(memberlist) mod_id = request.POST.get("modulelist")[1:5] sys_param_1 =request.POST.get("sys_param_1","") sys_param_2 =request.POST.get("sys_param_2","") put_string = mod_id+"@@"+hosts+"@@"+sys_param_1+"@@"+sys_param_2+"@@" try: conn=rpyc.connect('192.168.2.131',11511) conn.root.login('OMuser','KJS23o4ij09gHF734iuhsdfhkGYSihoiwhj38u4h') except Exception,e: logging.error('connect rpyc server error:'+str(e)) return HttpResponse('connect rpyc server error:'+str(e)) put_string=tencode(put_string, settings.SECRET_KEY) OPresult=tdecode(conn.root.Runcommands(put_string), settings.SECRET_KEY)
def ywgl(request): OPresult = "" hostgroup = Hostgroup.objects.all() modulelist = ModuleList.objects.all() if request.POST: put_string = "" memberlist = request.POST.getlist("memberlist", "") hosts = ','.join(memberlist) mod_id = request.POST.get("modulelist")[1:5] sys_param_1 = request.POST.get("sys_param_1", "") sys_param_2 = request.POST.get("sys_param_2", "") put_string = mod_id + "@@" + hosts + "@@" + sys_param_1 + "@@" + sys_param_2 + "@@" try: conn = rpyc.connect('192.168.2.131', 11511) conn.root.login('OMuser', 'KJS23o4ij09gHF734iuhsdfhkGYSihoiwhj38u4h') except Exception, e: logging.error('connect rpyc server error:' + str(e)) return HttpResponse('connect rpyc server error:' + str(e)) put_string = tencode(put_string, settings.SECRET_KEY) OPresult = tdecode(conn.root.Runcommands(put_string), settings.SECRET_KEY)
class FeedsRresultAdmin(admin.ModelAdmin): actions = ['getArticle', 'getFeed', 'saveArticle'] actions_on_top = True actions_on_bottom = True def getArticle(self, request, queryset, *arg1, **arg2): for feed in queryset: logging.info('start to fetch article,The title is %s', feed.title) try: if feed.feed.start_target != 'nohtml': logging.info('fetch new article %s,at %s' % (feed.link, datetime.now())) contenthtml = '' try: result = getpage(feed.link, 30) if result.code == 200: if len( feed.feed.start_target ) != 0 and feed.feed.start_target != 'nohtml': contenthtml = htmllib.parsehtml( result.read(), feed.feed, feed.link, feed.feed.feedurl) else: contenthtml = feed.excerpt self.__store_article(contenthtml, feed) return True return False except Exception, data: logging.info('DownloadError in get %s.the error is %s', feed.link, data) return False else: self.__store_article(feed.excerpt, feed) except Exception, data: logging.error('the rpc error is %s ', data)