def storecmt(url, content, pubdate, user):
     content = Common.strfilter(content)
     user = Common.strfilter(user)
     pubdate = TimeUtility.getuniformtime(pubdate)
     if not CMTStorage.exist(url, content, pubdate, user):
         Logger.getlogging().debug(
             'url:{url}, content:{content}, pubdate:{pubdate}, user:{user}'.
             format(url=url, content=content, pubdate=pubdate, user=user))
         id = CMTStorage.getid(url, content, pubdate, user)
         data = {
             SQLDAO.SPIDER_TABLE_COMMENTS_ID:
             id,
             SQLDAO.SPIDER_TABLE_COMMENTS_URL:
             url,
             SQLDAO.SPIDER_TABLE_COMMENTS_PUBLISH_DATE:
             pubdate,
             SQLDAO.SPIDER_TABLE_COMMENTS_USER:
             user,
             SQLDAO.SPIDER_TABLE_COMMENTS_CONTENT:
             content,
             SQLDAO.SPIDER_TABLE_COMMENTS_CREATE_DATE:
             SpiderConfigure.getinstance().starttime()
         }
         SQLDAO.getinstance().insert(
             SQLDAO.SPIDER_TABLE_COMMENTS,
             SQLDAO.SPIDER_TABLE_COMMENTS_KEYS,
             SQLDAO.getvaluesfromkeys(data,
                                      SQLDAO.SPIDER_TABLE_COMMENTS_KEYS))
    def seturlinfos(params):
        id = NewsStorage.getid(params.url)
        if NewsStorage.exist(params.url):
            doc = NewsStorage.getdoc(params.url)
            data = {}
            #data[SQLDAO.SPIDER_TABLE_NEWS_TYPE] = params.type
            data[SQLDAO.SPIDER_TABLE_NEWS_TITLE] = Common.strfilter(
                params.title)
            if params.type != constant.SPIDER_S2_WEBSITE_VIDEO:
                data[SQLDAO.SPIDER_TABLE_NEWS_BODY] = Common.strfilter(
                    params.body)
            if doc.get(SQLDAO.SPIDER_TABLE_NEWS_PUBLISH_DATE,
                       TimeUtility.getintformtime(
                           0)) == TimeUtility.getintformtime(0):
                data[
                    SQLDAO.
                    SPIDER_TABLE_NEWS_PUBLISH_DATE] = TimeUtility.getuniformtime(
                        params.pubtime)
            data[SQLDAO.SPIDER_TABLE_NEWS_CMTNUM] = params.cmtnum
            data[SQLDAO.SPIDER_TABLE_NEWS_CLICKNUM] = params.clicknum
            data[SQLDAO.SPIDER_TABLE_NEWS_FANSNUM] = params.fansnum
            data[SQLDAO.SPIDER_TABLE_NEWS_VOTENUM] = params.votenum
            data[SQLDAO.SPIDER_TABLE_NEWS_UPDATE_DATE] = SQLDAO.gettime()
            SQLDAO.getinstance().update(SQLDAO.SPIDER_TABLE_NEWS,
                                        {SQLDAO.SPIDER_TABLE_NEWS_ID: id},
                                        data)
        else:
            data = {}
            data[SQLDAO.SPIDER_TABLE_NEWS_TYPE] = params.type
            data[SQLDAO.SPIDER_TABLE_NEWS_TITLE] = Common.strfilter(
                params.title)
            if params.type != constant.SPIDER_S2_WEBSITE_VIDEO:
                data[SQLDAO.SPIDER_TABLE_NEWS_BODY] = Common.strfilter(
                    params.body)
            data[SQLDAO.
                 SPIDER_TABLE_NEWS_PUBLISH_DATE] = TimeUtility.getuniformtime(
                     params.pubtime)
            data[SQLDAO.SPIDER_TABLE_NEWS_CMTNUM] = params.cmtnum
            data[SQLDAO.SPIDER_TABLE_NEWS_CLICKNUM] = params.clicknum
            data[SQLDAO.SPIDER_TABLE_NEWS_FANSNUM] = params.fansnum
            data[SQLDAO.SPIDER_TABLE_NEWS_VOTENUM] = params.votenum
            data[SQLDAO.SPIDER_TABLE_NEWS_UPDATE_DATE] = SQLDAO.gettime()

            data[SQLDAO.SPIDER_TABLE_NEWS_ID] = id
            data[SQLDAO.SPIDER_TABLE_NEWS_URL] = params.url
            data[SQLDAO.SPIDER_TABLE_NEWS_QUERY] = params.query
            data[SQLDAO.SPIDER_TABLE_NEWS_CHANNEL] = params.channel
            data[SQLDAO.SPIDER_TABLE_NEWS_CREATE_DATE] = params.createtime
            data[SQLDAO.
                 SPIDER_TABLE_NEWS_MACHINEFLAG] = NewsStorage.LOCALMACHINEFLAG
            SQLDAO.getinstance().insert(SQLDAO.SPIDER_TABLE_NEWS,
                                        SQLDAO.SPIDER_TABLE_NEWS_KEYS,
                                        SQLDAO.getvaluesfromkeys(data))
Пример #3
0
 def storetiebaquery(self,
                     query,
                     queryurl,
                     machineflaglist=MACHINEFLAGLIST_TIEBA):
     #查询query是否存在,如果存在则更新当前updatetime
     #                  如果不存在则查找具有query数量最小的机器,进行query存储
     query = query.strip()
     queryurl = queryurl.strip()
     result = QueryStorage.find(query,
                                machineflaglist,
                                table=SQLDAO.SPIDER_TABLE_QUERYS_TIEBA)
     if result:
         resultdict = SQLDAO.getdictdata(SQLDAO.SPIDER_TABLE_QUERYS_KEYS,
                                         result)
         machine = resultdict[SQLDAO.SPIDER_TABLE_QUERYS_MACHINEFLAG]
         id = QueryStorage.getid(query, machine)
         SQLDAO.getinstance().update(
             SQLDAO.SPIDER_TABLE_QUERYS_TIEBA,
             {SQLDAO.SPIDER_TABLE_QUERYS_ID: id}, {
                 SQLDAO.SPIDER_TABLE_QUERYS_UPDATEDATE:
                 SpiderConfigure.getinstance().starttime(),
                 SQLDAO.SPIDER_TABLE_QUERYS_VALID:
                 1
             })
     else:
         machine = min(self.querystorage_tieba.iteritems(),
                       key=lambda x: x[1])[0]
         data = {
             SQLDAO.SPIDER_TABLE_QUERYS_ID:
             QueryStorage.getid(query, machine),
             SQLDAO.SPIDER_TABLE_QUERYS_QUERY:
             query,
             SQLDAO.SPIDER_TABLE_QUERYS_CREATEDATE:
             SpiderConfigure.getinstance().starttime(),
             SQLDAO.SPIDER_TABLE_QUERYS_UPDATEDATE:
             SpiderConfigure.getinstance().starttime(),
             SQLDAO.SPIDER_TABLE_QUERYS_MACHINEFLAG:
             machine,
             SQLDAO.SPIDER_TABLE_QUERYS_QUERYURL:
             queryurl,
             SQLDAO.SPIDER_TABLE_QUERYS_VALID:
             1
         }
         SQLDAO.getinstance().insert(
             SQLDAO.SPIDER_TABLE_QUERYS_TIEBA,
             SQLDAO.SPIDER_TABLE_QUERYS_KEYS,
             SQLDAO.getvaluesfromkeys(data,
                                      SQLDAO.SPIDER_TABLE_QUERYS_KEYS))
     #对各machine的实时存储记录
     self.querystorage_tieba[machine] = self.querystorage_tieba.get(
         machine, 0) + 1
 def storeurl(url):
     id = NewsStorage.getid(url)
     if not NewsStorage.exist(url):
         data = {}
         data[SQLDAO.SPIDER_TABLE_NEWS_ID] = id
         data[SQLDAO.SPIDER_TABLE_NEWS_URL] = url
         data[SQLDAO.SPIDER_TABLE_NEWS_QUERY] = SpiderConfigure.getinstance(
         ).getquery()
         data[SQLDAO.
              SPIDER_TABLE_NEWS_CHANNEL] = SpiderConfigure.getinstance(
              ).getchannel()
         data[SQLDAO.
              SPIDER_TABLE_NEWS_MACHINEFLAG] = NewsStorage.LOCALMACHINEFLAG
         data[SQLDAO.
              SPIDER_TABLE_NEWS_CREATE_DATE] = SpiderConfigure.getinstance(
              ).starttime()
         data[SQLDAO.SPIDER_TABLE_NEWS_UPDATE_DATE] = SQLDAO.gettime()
         SQLDAO.getinstance().insert(SQLDAO.SPIDER_TABLE_NEWS,
                                     SQLDAO.SPIDER_TABLE_NEWS_KEYS,
                                     SQLDAO.getvaluesfromkeys(data))
 def seturlinfo(url, key=None, value=None, data={}):
     id = NewsStorage.getid(url)
     if data:
         SQLDAO.getinstance().update(SQLDAO.SPIDER_TABLE_NEWS,
                                     {SQLDAO.SPIDER_TABLE_NEWS_ID: id},
                                     data)
         return
     if SQLDAO.SPIDER_TABLE_NEWS_PUBLISH_DATE == key:
         value = TimeUtility.getuniformtime(value)
     if NewsStorage.exist(url):
         doc = NewsStorage.getdoc(url)
         tempvalue = doc.get(key, '')
         if tempvalue != value:
             data = {
                 key: value,
                 SQLDAO.SPIDER_TABLE_NEWS_UPDATE_DATE: SQLDAO.gettime()
             }
             SQLDAO.getinstance().update(SQLDAO.SPIDER_TABLE_NEWS,
                                         {SQLDAO.SPIDER_TABLE_NEWS_ID: id},
                                         data)
     else:
         data = {}
         data[SQLDAO.SPIDER_TABLE_NEWS_ID] = id
         data[SQLDAO.SPIDER_TABLE_NEWS_URL] = url
         data[SQLDAO.SPIDER_TABLE_NEWS_QUERY] = SpiderConfigure.getinstance(
         ).getquery()
         data[SQLDAO.
              SPIDER_TABLE_NEWS_CHANNEL] = SpiderConfigure.getinstance(
              ).getchannel()
         data[SQLDAO.
              SPIDER_TABLE_NEWS_MACHINEFLAG] = NewsStorage.LOCALMACHINEFLAG
         data[SQLDAO.
              SPIDER_TABLE_NEWS_CREATE_DATE] = SpiderConfigure.getinstance(
              ).starttime()
         data[key] = value
         data[SQLDAO.SPIDER_TABLE_NEWS_UPDATE_DATE] = SQLDAO.gettime()
         SQLDAO.getinstance().insert(SQLDAO.SPIDER_TABLE_NEWS,
                                     SQLDAO.SPIDER_TABLE_NEWS_KEYS,
                                     SQLDAO.getvaluesfromkeys(data))