示例#1
0
 def reload_handler(self,signum,frame):
     tmp = get_Maps()
     if tmp:
         base.url_maps = tmp
         print 'reload the maps config file ok ...'
         print base.url_maps
     else:
         print 'reload the maps config file failed ...'
示例#2
0
 def reload_handler(self,signum,frame):
     tmp = get_Maps()
     if tmp:
         base.url_maps = tmp
         print 'reload the maps config file ok ...'
         print base.url_maps
     else:
         print 'reload the maps config file failed ...'
示例#3
0
    def Rules(self):
        #linkbase
        linkbase = getRedis()

        url_list = DQueue(linkbase,'url_list')
        url_set = Record(linkbase, 'crawled_set')

        base.url_maps = get_Maps()
        signal.signal(60, self.reload_handler)

        list = {
            'url':url_list,
            'url_set':url_set,
        }
        self.AddRules(list, 'Parse_json', 'url', 10)
示例#4
0
    def Rules(self):
        #linkbase
        linkbase = getRedis(2)
        #linkbase.flushdb()
        db = BaseDb()
        db.connectdb()
        db.getAllCategorys()
        
        category_links = Categoryids(linkbase)
        url_list = DQueue(linkbase,'url_news')
#         category_links.set('aaaasw222','zhz')
#         print category_links.get('zhz')
#         sys.exit(0)
        for store in drugstoreurl:
            url_set = Record(linkbase, store)
            #print url_list.len()
            #sys.exit()
            #for i in xrange(30):
                #url = url_list.pop()
                #url_set.delete(url,store)
                #url_set.delete(url,'crawled_set')
                #print url_list.len()
#              print url_list.len()
            if(url_list.len() == 0):
                for item in base.category_ids:
                    if(store == 'http://search.jianke.com/prod'):
                        url = store+'?wd='+item['name']+'&catagoryid='+str(item['id'])
                    elif(store == 'http://www.jxdyf.com/search'):
                        url = store+'/'+item['name']+'.html?catagoryid='+str(item['id'])
                    elif(store == 'http://search.360kad.com'):
                        url = store+'?pageText='+item['name']+'&catagoryid='+str(item['id'])
                    elif(store == 'http://www.ehaoyao.com/search'):
                        url = store+'/search/'+item['name']+'?catagoryid='+str(item['id'])
                    elif(store == 'http://www.yaofang.cn/n/public/search'):
                        url = store+'?s_words='+item['name']+'&sort=interrelated&catagoryid='+str(item['id'])
                    
                    url_list.push(url)
            #url_list.pop()
            #print url_list.len()
            #sys.exit(0)
            base.url_maps = get_Maps()
            signal.signal(60, self.reload_handler)
            list = {
                    'url':url_list,
                    'url_set':url_set,
                    'category_links':category_links
                    }
            self.AddRules(list, 'Parse_url', 'url', 10)
示例#5
0
    def Rules(self):
        #linkbase
        linkbase = getRedis()

        url_list = DQueue(linkbase,'url_list')
        url_set = Record(linkbase, 'crawled_set')

        base.url_maps = get_Maps()
        signal.signal(60, self.reload_handler)

        list = {
            'url':url_list,
            'url_set':url_set,
        }

        self.AddRules(list, 'Parse_url', 'url', 10)