Пример #1
0
def parseCmtDict(destCmtDict, srcCmtDict, cmtId):
    global gVal;
    
    destCmtDict['id'] = cmtId;
    #print "destCmtDict['id']=",destCmtDict['id'];
    
    logging.debug("--- comment[%d] ---", destCmtDict['id']);
    logging.debug("srcCmtDict=%s", srcCmtDict);
    
    # singleCmtCellDict = {
        # 'userUrl'   : "",
        # 'username'  : "",
        # 'time'      : "",
        # 'content'   : "",
    # };
    
    destCmtDict['author'] = srcCmtDict['username'];
    #print "destCmtDict['author']=",destCmtDict['author'];
    destCmtDict['author_url'] = srcCmtDict['userUrl'];
    #print "destCmtDict['author_url']=",destCmtDict['author_url'];
    
    #2012-05-15 11:03
    localTime = datetime.strptime(srcCmtDict['time'], "%Y-%m-%d %H:%M");
    #print "localTime=",localTime;
    gmtTime = crifanLib.convertLocalToGmt(localTime);
    #print "gmtTime=",gmtTime;
    destCmtDict['date']     = localTime.strftime("%Y-%m-%d %H:%M:%S");
    destCmtDict['date_gmt'] = gmtTime.strftime("%Y-%m-%d %H:%M:%S");

    destCmtDict['content'] = srcCmtDict['content'];
    #print "destCmtDict['content']=",destCmtDict['content']; # some char will raise error fro gbk can not show it

    destCmtDict['parent'] = 0;
    destCmtDict['author_email'] = "";
    destCmtDict['author_IP'] = "";
    destCmtDict['approved'] = 1;
    destCmtDict['type'] = "";
    destCmtDict['user_id'] = 0;
    
    logging.debug("author       =%s", destCmtDict['author']);
    logging.debug("author_url   =%s", destCmtDict['author_url']);
    logging.debug("date         =%s", destCmtDict['date']);
    logging.debug("date_gmt     =%s", destCmtDict['date_gmt']);
    logging.debug("parent       =%s", destCmtDict['parent']);
    logging.debug("content      =%s", destCmtDict['content']);
    
    #print "fill comments %4d OK"%(destCmtDict['id']);

    return ;
Пример #2
0
def parseCmtDict(destCmtDict, srcCmtDict, cmtId):
    global gVal;
    
    destCmtDict['id'] = cmtId;
    #print "destCmtDict['id']=",destCmtDict['id'];
    
    logging.debug("--- comment[%d] ---", destCmtDict['id']);
    logging.debug("srcCmtDict=%s", srcCmtDict);
    
    destCmtDict['author'] = srcCmtDict['???'];
    #print "destCmtDict['author']=",destCmtDict['author'];
    
    destCmtDict['author_url'] = ???;
    #print "destCmtDict['author_url']=",destCmtDict['author_url'];
    
    localTime = ???;
    #print "localTime=",localTime;
    gmtTime = crifanLib.convertLocalToGmt(localTime);
    #print "gmtTime=",gmtTime;
    destCmtDict['date']     = localTime.strftime("%Y-%m-%d %H:%M:%S");
    destCmtDict['date_gmt'] = gmtTime.strftime("%Y-%m-%d %H:%M:%S");

    destCmtDict['content'] = ???;
    #print "destCmtDict['content']=",destCmtDict['content']; # some char will raise error for gbk can not show it

    destCmtDict['parent'] = 0;
    destCmtDict['author_email'] = "";
    destCmtDict['author_IP'] = "";
    destCmtDict['approved'] = 1;
    destCmtDict['type'] = "";
    destCmtDict['user_id'] = 0;
    
    logging.debug("author       =%s", destCmtDict['author']);
    logging.debug("author_url   =%s", destCmtDict['author_url']);
    logging.debug("date         =%s", destCmtDict['date']);
    logging.debug("date_gmt     =%s", destCmtDict['date_gmt']);
    logging.debug("parent       =%s", destCmtDict['parent']);
    logging.debug("content      =%s", destCmtDict['content']);
    
    #print "-------fill comments %4d OK"%(destCmtDict['id']);

    return ;
Пример #3
0
def parseCmtDict(destCmtDict, srcCmtDict, cmtId, cmtIdRelationDict):
    global gVal

    destCmtDict['id'] = cmtId
    #print "destCmtDict['id']=",destCmtDict['id'];

    logging.debug("--- comment[%d] ---", destCmtDict['id'])
    logging.debug("srcCmtDict=%s", srcCmtDict)

    # store realtion
    orginId = srcCmtDict['CommentId']
    newCmtId = destCmtDict['id']
    cmtIdRelationDict[orginId] = newCmtId
    #print "id: orginId=%d, newCmtId=%d"%(orginId, newCmtId);

    # {
    # "ArticleId": 6543438,
    # "BlogId": 943376,
    # "CommentId": 2177665,
    # "Content": "看了好多篇,收货很多, 看的羨慕妒恨!~~~~~~~~\n虽然有很多之前没看过,很多之气都没听过,现在终于见识了!楼主的无私,很感动,就也无私的推荐给公司研发的所有同事,同事都是大赞!!",
    # "ParentId": 0,
    # "PostTime": "3天前 12:50",
    # "Replies": null,
    # "UserName": "******",
    # "Userface": "http://avatar.csdn.net/6/3/7/3_zxyzlx.jpg"
    # }

    # {
    # "ArticleId": 6543438,
    # "BlogId": 943376,
    # "CommentId": 2135899,
    # "Content": "牛人 帮助了好多人啊",
    # "ParentId": 0,
    # "PostTime": "2012-03-19 01:02",
    # "Replies": null,
    # "UserName": "******",
    # "Userface": "http://avatar.csdn.net/B/3/C/3_pkufgs.jpg"
    # }

    destCmtDict['author'] = srcCmtDict['UserName']
    #print "srcCmtDict['UserName']="******"/" + srcCmtDict['UserName']
    #print "cmtUserUrl=",cmtUserUrl;
    destCmtDict['author_url'] = cmtUserUrl

    localTime = None
    cmtTimeStr = srcCmtDict['PostTime']
    #print "cmtTimeStr=",cmtTimeStr;
    #print "type(cmtTimeStr)=",type(cmtTimeStr);
    #2012-03-19 01:02
    #2012-04-08 15:26
    foundNormalType = re.search(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}", cmtTimeStr)
    if (foundNormalType):
        #print "foundNormalType=",foundNormalType;
        localTime = datetime.strptime(cmtTimeStr, "%Y-%m-%d %H:%M")
    else:
        # special type:
        #刚刚
        foundSpecial = re.search(u"刚刚", cmtTimeStr)
        if (foundSpecial):
            localTime = datetime.now()

        #1分钟前
        if (not foundSpecial):
            foundSpecial = re.search(u"(?P<minutes>\d+)分钟前", cmtTimeStr)
            if (foundSpecial):
                minutes = foundSpecial.group("minutes")
                minutes = int(minutes)
                localTime = datetime.now() - timedelta(minutes=minutes)

        #3小时前
        if (not foundSpecial):
            foundSpecial = re.search(u"(?P<hours>\d+)小时前", cmtTimeStr)
            if (foundSpecial):
                hours = foundSpecial.group("hours")
                hours = int(hours)
                localTime = datetime.now() - timedelta(hours=hours)

        #昨天 20:01
        if (not foundSpecial):
            foundSpecial = re.search(u"昨天 \d{2}:\d{2}", cmtTimeStr)
            if (foundSpecial):
                yestoday = datetime.now() - timedelta(days=1)
                yestodayStr = yestoday.strftime("%Y-%m-%d")
                cmtTimeStr = cmtTimeStr.replace(u"昨天", yestodayStr)
                localTime = datetime.strptime(cmtTimeStr, "%Y-%m-%d %H:%M")

        #前天 15:12
        if (not foundSpecial):
            foundSpecial = re.search(u"前天 \d{2}:\d{2}", cmtTimeStr)
            if (foundSpecial):
                dayBeforeYestoday = datetime.now() - timedelta(days=2)
                dayBeforeYestodayStr = dayBeforeYestoday.strftime("%Y-%m-%d")
                cmtTimeStr = cmtTimeStr.replace(u"前天", dayBeforeYestodayStr)
                localTime = datetime.strptime(cmtTimeStr, "%Y-%m-%d %H:%M")

        #3天前 12:50
        #4天前 19:12
        #5天前 21:13
        #6天前 08:11
        if (not foundSpecial):
            foundSpecial = re.search(
                u"(?P<prevDayStr>(?P<prevDays>\d+)天前) \d{2}:\d{2}", cmtTimeStr)
            #print "天前,foundSpecial=",foundSpecial;
            if (foundSpecial):
                prevDayStr = foundSpecial.group("prevDayStr")

                prevDays = foundSpecial.group("prevDays")
                prevDays = int(prevDays)

                prevDaysDatetime = datetime.now() - timedelta(days=prevDays)

                prevDaysRealStr = prevDaysDatetime.strftime("%Y-%m-%d")
                cmtTimeStr = cmtTimeStr.replace(prevDayStr, prevDaysRealStr)
                localTime = datetime.strptime(cmtTimeStr, "%Y-%m-%d %H:%M")

    #print "localTime=",localTime;
    gmtTime = crifanLib.convertLocalToGmt(localTime)
    #print "gmtTime=",gmtTime;
    destCmtDict['date'] = localTime.strftime("%Y-%m-%d %H:%M:%S")
    destCmtDict['date_gmt'] = gmtTime.strftime("%Y-%m-%d %H:%M:%S")

    destCmtDict['content'] = srcCmtDict['Content']
    #print "destCmtDict['content']=",destCmtDict['content'];

    parentId = srcCmtDict['ParentId']
    if (parentId in cmtIdRelationDict):
        destCmtDict['parent'] = cmtIdRelationDict[parentId]
        gVal['dbgSubCmtNum'] += 1
    else:
        destCmtDict['parent'] = 0

    destCmtDict['author_email'] = ""
    destCmtDict['author_IP'] = ""
    destCmtDict['approved'] = 1
    destCmtDict['type'] = ""
    destCmtDict['user_id'] = 0

    logging.debug("author       =%s", destCmtDict['author'])
    logging.debug("author_url   =%s", destCmtDict['author_url'])
    logging.debug("date         =%s", destCmtDict['date'])
    logging.debug("date_gmt     =%s", destCmtDict['date_gmt'])
    logging.debug("parent       =%s", destCmtDict['parent'])
    logging.debug("content      =%s", destCmtDict['content'])

    #print "fill comments %4d OK"%(destCmtDict['id']);

    return
Пример #4
0
def parseCmtDict(destCmtDict, srcCmtDict, cmtId, cmtIdRelationDict):
    global gVal;
    
    destCmtDict['id'] = cmtId;
    #print "destCmtDict['id']=",destCmtDict['id'];
    
    logging.debug("--- comment[%d] ---", destCmtDict['id']);
    logging.debug("srcCmtDict=%s", srcCmtDict);
    
    # store realtion
    orginId = srcCmtDict['CommentId'];
    newCmtId = destCmtDict['id']
    cmtIdRelationDict[orginId] = newCmtId;
    #print "id: orginId=%d, newCmtId=%d"%(orginId, newCmtId);
        
    # {
        # "ArticleId": 6543438,
        # "BlogId": 943376,
        # "CommentId": 2177665,
        # "Content": "看了好多篇,收货很多, 看的羨慕妒恨!~~~~~~~~\n虽然有很多之前没看过,很多之气都没听过,现在终于见识了!楼主的无私,很感动,就也无私的推荐给公司研发的所有同事,同事都是大赞!!",
        # "ParentId": 0,
        # "PostTime": "3天前 12:50",
        # "Replies": null,
        # "UserName": "******",
        # "Userface": "http://avatar.csdn.net/6/3/7/3_zxyzlx.jpg"
    # }
    
    # {
        # "ArticleId": 6543438,
        # "BlogId": 943376,
        # "CommentId": 2135899,
        # "Content": "牛人 帮助了好多人啊",
        # "ParentId": 0,
        # "PostTime": "2012-03-19 01:02",
        # "Replies": null,
        # "UserName": "******",
        # "Userface": "http://avatar.csdn.net/B/3/C/3_pkufgs.jpg"
    # }
    
    destCmtDict['author'] = srcCmtDict['UserName'];
    #print "srcCmtDict['UserName']="******"/" + srcCmtDict['UserName'];
    #print "cmtUserUrl=",cmtUserUrl;
    destCmtDict['author_url'] = cmtUserUrl;
    
    localTime = None;
    cmtTimeStr = srcCmtDict['PostTime'];
    #print "cmtTimeStr=",cmtTimeStr;
    #print "type(cmtTimeStr)=",type(cmtTimeStr);
    #2012-03-19 01:02
    #2012-04-08 15:26
    foundNormalType = re.search(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}", cmtTimeStr);
    if(foundNormalType):
        #print "foundNormalType=",foundNormalType;
        localTime = datetime.strptime(cmtTimeStr, "%Y-%m-%d %H:%M");
    else:
        # special type:
        #刚刚
        foundSpecial = re.search(u"刚刚", cmtTimeStr);
        if(foundSpecial):
            localTime = datetime.now();
        
        #1分钟前
        if(not foundSpecial):
            foundSpecial = re.search(u"(?P<minutes>\d+)分钟前", cmtTimeStr);
            if(foundSpecial):
                minutes = foundSpecial.group("minutes");
                minutes = int(minutes);
                localTime = datetime.now() - timedelta(minutes=minutes);

        #3小时前
        if(not foundSpecial):
            foundSpecial = re.search(u"(?P<hours>\d+)小时前", cmtTimeStr);
            if(foundSpecial):
                hours = foundSpecial.group("hours");
                hours = int(hours);
                localTime = datetime.now() - timedelta(hours=hours);

        #昨天 20:01
        if(not foundSpecial):
            foundSpecial = re.search(u"昨天 \d{2}:\d{2}", cmtTimeStr);
            if(foundSpecial):
                yestoday = datetime.now() - timedelta(days=1);
                yestodayStr = yestoday.strftime("%Y-%m-%d");
                cmtTimeStr = cmtTimeStr.replace(u"昨天", yestodayStr);
                localTime = datetime.strptime(cmtTimeStr, "%Y-%m-%d %H:%M");
                
        #前天 15:12
        if(not foundSpecial):
            foundSpecial = re.search(u"前天 \d{2}:\d{2}", cmtTimeStr);
            if(foundSpecial):
                dayBeforeYestoday = datetime.now() - timedelta(days=2);
                dayBeforeYestodayStr = dayBeforeYestoday.strftime("%Y-%m-%d");
                cmtTimeStr = cmtTimeStr.replace(u"前天", dayBeforeYestodayStr);
                localTime = datetime.strptime(cmtTimeStr, "%Y-%m-%d %H:%M");

        #3天前 12:50
        #4天前 19:12
        #5天前 21:13
        #6天前 08:11
        if(not foundSpecial):
            foundSpecial = re.search(u"(?P<prevDayStr>(?P<prevDays>\d+)天前) \d{2}:\d{2}", cmtTimeStr);
            #print "天前,foundSpecial=",foundSpecial;
            if(foundSpecial):
                prevDayStr = foundSpecial.group("prevDayStr");

                prevDays = foundSpecial.group("prevDays");
                prevDays = int(prevDays);
                
                prevDaysDatetime = datetime.now() - timedelta(days=prevDays);

                prevDaysRealStr = prevDaysDatetime.strftime("%Y-%m-%d");
                cmtTimeStr = cmtTimeStr.replace(prevDayStr, prevDaysRealStr);
                localTime = datetime.strptime(cmtTimeStr, "%Y-%m-%d %H:%M");

    #print "localTime=",localTime;
    gmtTime = crifanLib.convertLocalToGmt(localTime);
    #print "gmtTime=",gmtTime;
    destCmtDict['date'] = localTime.strftime("%Y-%m-%d %H:%M:%S");
    destCmtDict['date_gmt'] = gmtTime.strftime("%Y-%m-%d %H:%M:%S");
    
    destCmtDict['content'] = srcCmtDict['Content'];
    #print "destCmtDict['content']=",destCmtDict['content'];

    parentId = srcCmtDict['ParentId'];
    if(parentId in cmtIdRelationDict):
        destCmtDict['parent'] = cmtIdRelationDict[parentId];
        gVal['dbgSubCmtNum'] += 1;
    else :
        destCmtDict['parent'] = 0;

    destCmtDict['author_email'] = "";
    destCmtDict['author_IP'] = "";
    destCmtDict['approved'] = 1;
    destCmtDict['type'] = "";
    destCmtDict['user_id'] = 0;
    
    logging.debug("author       =%s", destCmtDict['author']);
    logging.debug("author_url   =%s", destCmtDict['author_url']);
    logging.debug("date         =%s", destCmtDict['date']);
    logging.debug("date_gmt     =%s", destCmtDict['date_gmt']);
    logging.debug("parent       =%s", destCmtDict['parent']);
    logging.debug("content      =%s", destCmtDict['content']);
    
    #print "fill comments %4d OK"%(destCmtDict['id']);
    
    return ;