示例#1
0
def setPrimo():
    ###################################################################
    ##func: setPrimo
    ##param: none
    ##desc: takes no params.  Instead, as long as page title, page text
    ##      and celebList are properly put together, it will rank the entities
    ##      for primo position
    ##ret: None
    ##auth: esr
    ##################################################################

    if 'outline' not in infoModule.info.page or infoModule.info.page[
            'outline'] == '':
        log.plog('no outline set before setPrimo called', 5)
        return

    if 'title' not in infoModule.info.page or infoModule.info.page[
            'title'] == '':
        log.plog('no title set before setPrimo called', 5)
        return

    if len(infoModule.info.entityList) == 0:
        log.plog('no entities set before setPrimo called', 5)
        return

    #highlight text as a way to count the frequency
    highlightedText = highlightEntities.highlightEntitiesFromList(
        infoModule.info.page['title'] + ' ' + infoModule.info.page['outline'],
        infoModule.info.entityList)
    while True:
        taggedEntity = re.search('\[celeb url=(.+?)\]', highlightedText)
        if taggedEntity == None:
            break
        highlightedText = highlightedText.replace(taggedEntity.group(0), '', 1)
        lookupUrl = taggedEntity.group(1)
        sql = "select celeb_id from db_topics.celebs where lookupUrl='" + taggedEntity.group(
            1) + "'"
        entityByLookupQ = mysql_tools.mysqlQuery(
            sql, infoModule.info.site['dblink'])
        while (1):
            entityByLookup = entityByLookupQ.fetch_row(1, 1)
            if entityByLookup == ():
                break
            if infoModule.info.entityList[entityByLookup[0]
                                          ['celeb_id']]['frequency'] == 0:
                infoModule.info.entityList[entityByLookup[0]
                                           ['celeb_id']]['frequency'] = 1
            else:
                infoModule.info.entityList[entityByLookup[0]
                                           ['celeb_id']]['frequency'] += 1

    # now frequency is set, first two are based on position, next two based on frequency
    primoTypes = ['Y', '2', '3', '4', 'N']
    primoTypeCursor = 0

    #if source is associated with entity, it's auto-set to primo
    if 'celeb_id' in infoModule.info.source and infoModule.info.source[
            'celeb_id'] != '' and int(infoModule.info.source['celeb_id']) > 0:
        infoModule.info.entityList[infoModule.info.source['celeb_id']][
            'primo'] = primoTypes[primoTypeCursor]
        primoTypeCursor += 1

    posArray = []
    entKeys = infoModule.info.entityList.keys()
    for eKey in entKeys:
        if infoModule.info.entityList[eKey]['position'] != None:
            posArray.append(
                [eKey, infoModule.info.entityList[eKey]['position']])

    posArray.sort(key=lambda x: x[1])

    freqArray = []
    entKeys = infoModule.info.entityList.keys()
    for eKey in entKeys:
        if infoModule.info.entityList[eKey][
                'frequency'] != None and infoModule.info.entityList[eKey][
                    'frequency'] > 1:
            freqArray.append(
                [eKey, infoModule.info.entityList[eKey]['frequency']])

    freqArray.sort(key=lambda x: x[1], reverse=True)

    titleLen = len(infoModule.info.page['title'])
    #primo set by position in title
    ctr = 0
    while primoTypeCursor < 4 and len(posArray) > ctr:
        if posArray[ctr][1] > titleLen:
            break
        ## only non-hidden entities can be primo
        if entityLib.entityLibrary(posArray[ctr][0],
                                   'visibility') != 'invisible':
            log.plog(
                "entity %s found in title, setting to primo %s" %
                (posArray[ctr][0], primoTypes[primoTypeCursor]), 3)
            infoModule.info.entityList[
                posArray[ctr][0]]['primo'] = primoTypes[primoTypeCursor]
            primoTypeCursor += 1
        ctr += 1

    #primo set by frequency
    ctr = 0
    while primoTypeCursor < 4 and ctr < len(freqArray):
        if infoModule.info.entityList[freqArray[ctr][0]]['primo'] == 'N':
            ## only non-hidden entities can be primo
            if entityLib.entityLibrary(freqArray[ctr][0],
                                       'visibility') != 'invisible':
                log.plog(
                    "entity %s has high frequency, setting to primo %s" %
                    (freqArray[ctr][0], primoTypes[primoTypeCursor]), 3)
                infoModule.info.entityList[
                    freqArray[ctr][0]]['primo'] = primoTypes[primoTypeCursor]
                primoTypeCursor += 1
        ctr += 1

    #if nothing found, go to first position and set it to primo
    if primoTypeCursor == 0:
        ctr = 0
        while len(posArray) > ctr:
            ## only non-hidden entities can be primo
            if entityLib.entityLibrary(posArray[ctr][0],
                                       'visibility') != 'invisible':
                log.plog(
                    "entity %s found first in story, setting to primo %s" %
                    (posArray[ctr][0], primoTypes[primoTypeCursor]), 3)
                infoModule.info.entityList[
                    posArray[ctr][0]]['primo'] = primoTypes[primoTypeCursor]
                primoTypeCursor += 1
                break
            ctr += 1
示例#2
0
def addStory():
    siteDB = infoModule.info.site['database']
    if 'debug_mode' in infoModule.info.site and infoModule.info.site['debug_mode'] == True:
        debugMode = True
    else:
        debugMode = False

    if 'title' not in infoModule.info.page or infoModule.info.page['title'] == '':
        log.plog('addStory called without title', 5)
        return False

    if 'publish_immediately' in infoModule.info.source and infoModule.info.source['publish_immediately'] == '1':
        #if the feed always goes live, as in celeb blogs, then set status to A and story table to subs
        storyTable = "subs"
        newsroomonStoryTable = 'newsroom'
        storyStatus = "A"
        #$storyUser = $source['user_alias'
        storyUser = -1
        storyScore = 1
    else:
        storyTable = "newsroom"
        nonStoryTable = 'subs'
        storyStatus = "A"
        storyUser = -1
        storyScore = 0

    if 'featured_source' in infoModule.info.source and infoModule.info.source['featured_source'] == '1':
        topPick = 'true'
        infoModule.info.site['overrideImageMinSize'] = True
    else:
        topPick = 'false'
    
    vtid = 0
    if 'vthumb' in infoModule.info.page and infoModule.info.page['vthumb'] != '':
        #there's a video, go get thumb
        
        mysql_tools.mysqlQuery("insert into " + siteDB + ".images set credit=''", infoModule.info.site['dblink'])
        # get the image id for the subs table
        vtid = infoModule.info.site['dblink'].insert_id()
        escImage = urllib.quote(infoModule.info.page['vthumb'])
        imageFetchingSocket = urllib.urlopen(infoModule.info.site['imageReceiver'] + '/imageFetcher.php?image_id=' + str(vtid) + '&type=video&url=' + escImage)
        imageInsert = imageFetchingSocket.read()
        log.plog(imageInsert, 2)

    # strip multi-byte chars, log differences"
    outline_before_replace = infoModule.info.page['outline']
    
    infoModule.info.page['outline'] = encoding.convertToAscii(infoModule.info.page['outline'])
    infoModule.info.page['title'] = encoding.convertToAscii(infoModule.info.page['title'])

    if outline_before_replace != infoModule.info.page['outline']:
        log.plog('multi-byte char(s) replaced', 3)
        
    if 'source_id' not in infoModule.info.source:
        infoModule.info.source['source_id'] = '0'

    #sanity checks
    #url must have http://
    if infoModule.info.page['url'][0:7] != 'http://' and infoModule.info.page['url'][0:8] != 'https://':
        log.plog("relative url, no host! " + infoModule.info.page['url'], 2)
        return False
        
    #mysql safe
    outline = infoModule.info.page['outline'].replace("'", "\\'")
    title = infoModule.info.page['title'].replace("'", "\\'")
    title = title.strip()
    if 'author' in infoModule.info.page:
        author = infoModule.info.page['author'].replace("'", "\\'")
    else:
        author = ''

    if 'vlink' not in infoModule.info.page or infoModule.info.page['vlink'] == None:
        vlink = ''
    else:
        vlink = infoModule.info.page['vlink']

    if 'sourceType' not in infoModule.info.page:
        sourceType = ''
    else:
        sourceType = infoModule.info.page['sourceType']

    if 'promoter' in infoModule.info.page:
        promoter = infoModule.info.page['promoter']
        if promoter == '':
            promoter = -1
    else:
        promoter = -1


    # do something special with blind sources
    if str(infoModule.info.source['source_id']) == '0':
        #if source title is not set, set it.
        infoModule.info.source['title'] = deduceSourceTitle(infoModule.info.page['url'])
        if infoModule.info.source['title'] == '':
            log.plog('ERROR could not deduce unknown source title', 5)
            os._exit(0)
        log.plog('splitting url into parts for url_regex: %s' % infoModule.info.page['url'], 2)
        URLParts = urlparse.urlparse(infoModule.info.page['url'])
        myHost = URLParts[1]
        myPath = URLParts[2]
        #if there is source info (site_url, url_regex, title) and no source
        infoModule.info.source['url_regex'] = myHost
        log.plog('setting url_regex for new source to ' + myHost, 2)
        log.plog("select * from " + siteDB + ".sources where url_regex='" + infoModule.info.source['url_regex'] + "'", 2)
        checkSourceQ = mysql_tools.mysqlQuery("select * from " + siteDB + ".sources where url_regex='" + infoModule.info.source['url_regex'] + "'", infoModule.info.site['dblink'])
        if checkSourceQ.num_rows() == 0:
            matches = re.search('^(.*?):', infoModule.info.source['title'])
            if matches != None:
                infoModule.info.source['title'] = matches.group(1)

            log.plog('adding new source', 3)
            sql = "insert into " + siteDB + ".sources set title='" +  infoModule.info.source['title'].replace("'", "\\'") + "', url_regex='" + infoModule.info.source['url_regex'] + "', site_url='http://" + myHost + "', machine_generated = true"
            if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False:
                mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])

            log.plog(sql, 2)
            #associate new source with new sub
            newSourceId = infoModule.info.site['dblink'].insert_id()
            infoModule.info.source['source_id'] = str(newSourceId)
        else:
            foundSourceID = checkSourceQ.fetch_row(1,1)
            infoModule.info.source['source_id'] = foundSourceID[0]['source_id']
    if int(infoModule.info.source['source_id']) == '0':
        log.plog('ERROR source_id is 0', 5)
        os._exit(0)
        
    if len(infoModule.info.entityList) == 0:
        log.plog('ERROR no entities in story', 5)
        os._exit(0)
     
    sql = "insert into " + siteDB + "."+ storyTable + " set user_id='" + str(storyUser) + "', sdatetime=now(), firstPosted=now(), title='" + title + "', outline='" + outline + "',url='" + infoModule.info.page['url'] + "', status='" + storyStatus + "', score=" + str(storyScore) + ", vlink='" + vlink + "', vthumb=" + str(vtid) + ", source_id=" + str(infoModule.info.source['source_id']) + ", sourceType='" + sourceType + "', celebrifier='" + str(promoter) + "', topPick=" + topPick + ", title_source='" + str(infoModule.info.source['source_id']) + " " + title + "', author='" + author + "'"
    log.plog(sql, 2)
    if 'debug_mode' in infoModule.info.site and infoModule.info.site['debug_mode'] == True:
        log.plog("Debug mode.  Not inserting: " + sql, 2)
        newSubId = 0
    else:
        res = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
        if res == False:
            return False
        newSubId = infoModule.info.site['dblink'].insert_id()
        if newSubId == 0:
            log.plog('error inserting new story', 5)
            return False


    #record the last story entered for debugging purposes
   
    sql = "update " + siteDB + ".sources set last_story_date=now(), last_story_id=" + str(newSubId) + " where source_id=" + str(infoModule.info.source['source_id'])
    if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False:
        mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])

    if storyTable == 'subs' and len(vlink) > 0 and 'videoStory' in infoModule.info.page and infoModule.info.page['videoStory'] != True:    
        log.plog("creating video-only story", 2)
        tempCelebList = infoModule.info.entityList
        #create_video_story.createVideoStory($entry['vlink'], $tempCelebList, $source)	

    sql = "update " + siteDB + ".relatedSubs set sub_id2 = " + str(newSubId) + " where sub_id2 = -1 and table2='" + storyTable + "'"
    if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False:
        mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])

    try:
        vurls = infoModule.info.site['vurls']
        for i in range(len(vurls)):
            #if promoter > 0, then this is from an outbound link
            log.plog("PROMOTER: " + str(promoter), 2)
            promoter = vurls[i]['promoter']
            if int(promoter) > 0:
                sql = "insert into " + siteDB + ".linkHistory set sub_id=" + str(newSubId) + ", linker_id=-1, feedIdx=" + str(promoter) + ", linkerURL='" + vurls[i]['submittingURL'] + "', linkedOn=now(), userVote=false"
                if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False:
                    mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
    except:
        pass

    #complete any linkhistory records
    sql = "update " + siteDB + ".linkHistory set linker_id=" + str(newSubId) + " where linker_id=-1 and feedIdx=" + str(infoModule.info.source['source_id']) + " and linkerURL='" + infoModule.info.page['url'] + "'"
    if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False:
        mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
    
    ## experimental.  for sportifi, try finding team from city and athletes
    #if infoModule.info.site['addTeamFromCity']:
    #    getTeamFromCity()
    ## add entities
    totalEntitiesAdded = 0
    entityForImage = 0
    entityForImageRank = dict({'Y':1, '2':2, '3':3, '4':4})
    lastEntityForImageRank = 5
    #finalEkeys holds the list of entities that are ACTUALLY in the story
    finalEkeys = []
    entKeys = infoModule.info.entityList.keys()
    for eKey in entKeys:
        videoPoints = 0
        picPoints = 0
        
        if infoModule.info.entityList[eKey]['primo'] != 'N':
            if entityForImageRank[infoModule.info.entityList[eKey]['primo']] < lastEntityForImageRank:
                entityForImage = eKey
                lastEntityForImageRank = entityForImageRank[infoModule.info.entityList[eKey]['primo']]
            totalEntitiesAdded += 1
            sql = "insert into " + siteDB + "." + storyTable + "_celebs set celeb_id=" + str(eKey) + ", sub_id=" + str(newSubId) + ", primo='" + infoModule.info.entityList[eKey]['primo'] + "'"
            finalEkeys.append(eKey)

            log.plog(sql, 2)
            if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False:
                mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
            ## stats
            statsExistsQ = mysql_tools.mysqlQuery("select celeb_id from " + siteDB + ".celebStats where celeb_id=" + str(eKey), infoModule.info.site['dblink'])
            if vlink != '':
                videoPoints = 1
            if 'overrideImageMinSize' not in infoModule.info.site:
                infoModule.info.site['overrideImageMinSize'] = False
            if 'imageMinSize' not in infoModule.info.site:
                infoModule.info.site['imageMinSize'] = 0
            if 'maxSize' not in infoModule.info.page:
                infoModule.info.page['maxSize'] = 0
            if (infoModule.info.site['overrideImageMinSize'] and infoModule.info.page['maxSize'] > 0) or infoModule.info.page['maxSize'] > infoModule.info.site['overrideImageMinSize']:
                picPoints = 1
            
            if statsExistsQ.num_rows() > 0:
                sql = "update " + siteDB + ".celebStats set photos = photos + " + str(picPoints) + ", videos = videos + " + str(videoPoints) + ", stories = stories + 1 where celeb_id = " + str(eKey)
            else:                       
                sql = "insert into " + siteDB + ".celebStats set photos = photos + " + str(picPoints) + ", videos = videos + " + str(videoPoints) + ", stories = stories + 1, celeb_id = " + str(eKey)

            #log.plog(sql, 2)
            if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False:
                mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])

        #for weighted stats.  Weighted stats depend on how primo the 
        #entity is.
        storiesWeighted = 1
        if infoModule.info.entityList[eKey]['primo'] == 'Y':
            storiesWeighted = 10
        elif infoModule.info.entityList[eKey]['primo'] == '2':
            storiesWeighted = 7
        elif infoModule.info.entityList[eKey]['primo'] == '3':
            storiesWeighted = 5
        elif infoModule.info.entityList[eKey]['primo'] == '4':
            storiesWeighted = 3
        #break up stats time by hourly blocks
        hourBlock = int(math.floor(time.time() / 3600))       
        statsExistsQ = mysql_tools.mysqlQuery("select celeb_id from " + siteDB + ".celebHourlyStats where celeb_id=" + str(eKey) + " and hourBlock = " + str(hourBlock), infoModule.info.site['dblink'])
        if statsExistsQ.num_rows() > 0:
            sql = "update " + siteDB + ".celebHourlyStats set photos = photos + " + str(picPoints) + ", videos = videos + " + str(videoPoints) + ", stories = stories + 1, storiesWeighted = storiesWeighted  + " + str(storiesWeighted) + " where celeb_id = " + str(eKey) + " and hourBlock = " + str(hourBlock)
        else:
            sql = "insert into " + siteDB + ".celebHourlyStats set photos = " + str(picPoints) + ", videos = " + str(videoPoints) + ", stories = 1, storiesWeighted = " + str(storiesWeighted) + ",  celeb_id = " + str(eKey) + ", hourBlock = " + str(hourBlock)

        # now do stats by day
        dayBlock = int(math.floor(time.time() / 86400))
        dayStatsExistsQ = mysql_tools.mysqlQuery("select celeb_id from " + siteDB + ".celebDailyStats where celeb_id=" + str(eKey) + " and dayBlock = " + str(hourBlock), infoModule.info.site['dblink'])
        if dayStatsExistsQ.num_rows() > 0:
            sql = "update " + siteDB + ".celebDailyStats set stories = stories + 1, storiesWeighted = storiesWeighted  + " + str(storiesWeighted) + " where celeb_id = " + str(eKey) + " and dayBlock = " + str(hourBlock)
        else:
            sql = "insert into " + siteDB + ".celebDailyStats set stories = 1, storiesWeighted = " + str(storiesWeighted) + ",  celeb_id = " + str(eKey) + ", dayBlock = " + str(hourBlock)

        #log.plog(sql, 2)
        if storyTable == 'subs':
            #don't count stories towards stats if they are in newsroom
            if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False:
                mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
                #update celeb popularity 
        if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False:
            sql = "update db_topics.celebs set popularity=popularity+1 where celeb_id=" + str(eKey)
            mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])


    freqArray = []
    log.plog('Entities:', 2)
    for entity_id in infoModule.info.entityList.keys():
        log.plog(str(entity_id) + ' ' + infoModule.info.entityList[entity_id]['primo'], 2)
    for eKey in entKeys:
        #note.  setting frequency to > 0 may be a good idea to remove entities that aren't really in story  
        if infoModule.info.entityList[eKey]['frequency'] != None and infoModule.info.entityList[eKey]['frequency'] >= 0 and infoModule.info.entityList[eKey]['primo'] == 'N':
            freqArray.append([eKey, infoModule.info.entityList[eKey]['frequency'] ])

    freqArray.sort(key=lambda x: x[1], reverse=True)
    for i in range(len(freqArray)):
        totalEntitiesAdded += 1
        if entityForImage == 0:
            entityForImage = eKey
        
        sql = "insert into " + siteDB + "." + storyTable + "_celebs set celeb_id=" + str(freqArray[i][0]) + ", sub_id=" + str(newSubId) + ", primo='" + infoModule.info.entityList[freqArray[i][0]]['primo'] + "'"
        finalEkeys.append(freqArray[i][0])
        
        if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False:
            mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])   
        if totalEntitiesAdded >= infoModule.info.site['maxEntities']:
            break
            
    ##### end of adding entities
    
    ####  relationships
    #### only add relationships between non-hidden entities
    invisibleTypesQuery = mysql_tools.mysqlQuery("select mptype_id from db_topics.mptypes where visibility='invisible'", infoModule.info.site['dblink'])
    invisibleTypes = ''
    sep = ''
    while True:
        oneType = invisibleTypesQuery.fetch_row(1,1)
        if oneType == ():
            break
        invisibleTypes = invisibleTypes + sep + oneType[0]['mptype_id']
        sep = ','
    entKeys = infoModule.info.entityList.keys()
    inList = ''
    sep = ''
    for eKey in entKeys:
        inList = inList + sep + str(eKey)
        sep = ','
        
    sql = "select celeb_id from db_topics.celebs where mptype_id not in (" + invisibleTypes + ") and celeb_id in (" + inList + ")"
    realEntityQuery = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
    realEntityList = []
    while True:
        realEntity = realEntityQuery.fetch_row(1,1)
        if realEntity == ():
            break
        realEntityList.append(realEntity[0]['celeb_id'])
    
    for i in range(len(realEntityList) - 1):
        entity1 = realEntityList[i]
        # no more than 1
        for j in range(i,len(realEntityList)):
            entity2 = realEntityList[j]
            sql = "select * from db_topics.celebs_related where (cid_1=" + entity1 + " and cid_2 = " + entity2 + ") or (cid_1 = " + entity2 + " and cid_2 = " + entity1 + ")"
            #log.plog(sql, 2)
            existingRelationshipQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
            if existingRelationshipQ.num_rows() > 0:
                #found an existing relationship update it.
                sql = "update db_topics.celebs_related set relevance=relevance+1 where (cid_1=" + entity1 + " and cid_2 = " + entity2 + ") or (cid_1 = " + entity2 + " and cid_2 = " + entity1 + ")"
                #log.plog(sql, 2)
                if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False:
                    mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])   

            else:
                #put them into the queue as possibly related.  after relevance reaches 20, make a relationship
                sql = "select * from db_topics.pending_relationships where cid_1=" + entity1 + " and cid_2=" + entity2
                pendingRelationshipQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])               
                if pendingRelationshipQ.num_rows() > 0:
                    sql = "update db_topics.pending_relationships set relevance = relevance + 1 where cid_1=" + entity1 + " and cid_2=" + entity2
                    #log.plog(sql, 2)
                    if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False:
                        mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])   
                    
                sql = "select * from db_topics.pending_relationships where cid_1=" + entity2 + " and cid_2=" + entity1
                pendingRelationship2Q = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])               
                if pendingRelationship2Q.num_rows() > 0:
                    sql = "update db_topics.pending_relationships set relevance = relevance + 1 where cid_1=" + entity2 + " and cid_2=" + entity1
                    #log.plog(sql, 2)
                    if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False:
                        mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])   
                    
                if pendingRelationshipQ.num_rows() == 0 and pendingRelationship2Q.num_rows() == 0:
                    #no pending relationship, create it
                    sql = "insert into db_topics.pending_relationships set cid_1 = " + entity1 + ", cid_2 = " + entity2 + ", relevance = 1"
                    #log.plog(sql, 2)
                    if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False:
                        mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])   
                    
                sql = "select * from db_topics.pending_relationships where relevance > 20"
                newRelationshipQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) 
                while True:
                    newRelationship = newRelationshipQ.fetch_row(1,1)
                    if newRelationship == ():
                        break
                    #cid_1 should be small
                    newRelationship_cid_1 = int(newRelationship[0]['cid_1'])
                    newRelationship_cid_2 = int(newRelationship[0]['cid_2'])
                    if newRelationship_cid_1 > newRelationship_cid_2:
                        tmpRelationship = newRelationship_cid_1
                        newRelationship_cid_1 = newRelationship_cid_2
                        newRelationship_cid_2 = tmpRelationship	             
                        
                    sql = "insert into db_topics.celebs_related set cid_1=" + str(newRelationship_cid_1) + ", cid_2=" + str(newRelationship_cid_2) + ", relevance=" + newRelationship[0]['relevance']
                    if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False:
                        mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])   
                    
                if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False:
                    mysql_tools.mysqlQuery("delete from db_topics.pending_relationships where relevance > 20", infoModule.info.site['dblink'])   
            
         

    # for stories that match conditions, set site_x = true so that sub-sites will recognize this story
    siteIds = []
    siteSourcesQ = mysql_tools.mysqlQuery("select * from " + siteDB + ".sites_sources where source_id=" + str(infoModule.info.source['source_id']), infoModule.info.site['dblink'])
    while True:
        siteSources = siteSourcesQ.fetch_row(1,1)
        if siteSources == ():
            break
        if siteSources[0]['site_id'] not in siteIds:
            siteIds.append(siteSources[0]['site_id'])
            log.plog('appending ' + str(siteSources[0]['site_id']) + ' to site IDs from source ' + str(infoModule.info.source['source_id']), 2)

    for eKey in finalEkeys:
        # only entities in master 15 are used for consideration in sub sites
        sql = "select sub_id from " + siteDB + "." + storyTable + "_celebs where celeb_id=" + str(eKey) + " and sub_id=" + str(newSubId)
        log.plog(sql, 2)
        inStoryQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
        if inStoryQ.num_rows() == 1:
            sql = "select * from " + siteDB + ".sites_celebs where celeb_id=" + str(eKey)
            #log.plog(sql, 2)
            siteEntitiesQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
            while True:
                siteEntity = siteEntitiesQ.fetch_row(1,1)
                if siteEntity == ():
                    break
                if siteEntity[0]['site_id'] not in siteIds:
                    siteIds.append(siteEntity[0]['site_id'])
                    log.plog('appending ' + str(siteEntity[0]['site_id']) + ' to site IDs from entity ' + str(eKey), 2)

    #have now created a list of sites.  Commit
    sitesQueryString = ''
    sep = ''
    for i in range(len(siteIds)):
        sitesQueryString = sitesQueryString + sep + "site_" + str(siteIds[i]) + " = true"
        sep = ','
        # for each site, check to see if the table newsroom_sites_x or subs_sites_x exists where x is site_id
        sql = "SHOW TABLES from " + siteDB + " like '" + storyTable + "_sites_" + str(siteIds[i] + "'")
        linkTableExists = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
        if linkTableExists == False:
            # if it doesn't exist, create it.
            sql = "create table " + siteDB + "." + storyTable + "_sites_" + str(siteIds[i]) + " (sub_id int primary key) engine=innoDB"
            if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False:
                mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])   
            
        # insert sub_id into subs_sites_x
        sql = "INSERT INTO "+ siteDB + "." + storyTable + "_sites_" + str(siteIds[i]) + " set sub_id=" + str(newSubId)
        log.plog(sql, 3)
        if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False:
            mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) 
              
    if len(siteIds) > 0:
        sql = "update " + siteDB + "." + storyTable + " set " + sitesQueryString + " where sub_id=" + str(newSubId)
        log.plog(sql, 2)
        if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False:
            mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])   
    
    
    ##### add images #####
    log.plog('AS: adding images')
    if 'maxSize' not in infoModule.info.page:
        infoModule.info.page['maxSize'] = 0
    if 'imageMinSize' not in infoModule.info.site:
        infoModule.info.site['imageMinSize'] = 99999
    if 'overrideImageMinSize' not in infoModule.info.site:
        infoModule.info.site['overrideImageMinSize'] = False

    log.plog("AS: maxSize: " + str(infoModule.info.page['maxSize']) + "(" + str(infoModule.info.site['imageMinSize']) + ") overrideImageMinSize: " + str(infoModule.info.site['overrideImageMinSize']), 2)
    storyImageAdded = False
    if (infoModule.info.site['overrideImageMinSize'] == True and infoModule.info.page['maxSize'] > 0) or infoModule.info.page['maxSize'] > infoModule.info.site['imageMinSize']:
        if 'largestImage' in infoModule.info.page:
            log.plog("AS: largest image: " + infoModule.info.page['largestImage'], 2)
            if 'imageSource' not in infoModule.info.page or infoModule.info.page['imageSource'] == None:
                infoModule.info.page['imageSource'] = ''
                log.plog('AS: imageSource not in infoModule.')
            else:
                log.plog('AS: imageSource IS IN infoModule.')
            infoModule.info.page['imageSource'] = infoModule.info.page['imageSource'].replace("'", "\\'")
            sql = "insert into " + siteDB + ".images set credit='" + infoModule.info.page['imageSource'] + "', originalUrl='" + infoModule.info.page['imageSource'] + "'"
            log.plog('AS: ' + sql)
            if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False:
                log.plog('AS: debug_mode NOT on')
                mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
                #new image id
                iid = infoModule.info.site['dblink'].insert_id()
                sql = "update " + siteDB + "." + storyTable + " set image_id=" + str(iid) + ", imgsrc='U' where sub_id=" + str(newSubId)
                log.plog("AS: " + sql)
                mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
                escImage = urllib.quote(infoModule.info.page['largestImage'])
                log.plog('AS: escImage: ' + escImage, 2)
                if 'featured_source' in infoModule.info.source and infoModule.info.source['featured_source'] == '1':
                    log.plog('AS: ' + infoModule.info.site['imageReceiver'] + '/imageFetcher.php?image_id=' + str(iid) + '&type=feed&topPick=true&url=' + escImage, 2)
                    imageFetchingSocket = urllib.urlopen(infoModule.info.site['imageReceiver'] + '/imageFetcher.php?image_id=' + str(iid) + '&type=feed&topPick=true&url=' + escImage)
                else:
                    log.plog('AS: ' + infoModule.info.site['imageReceiver'] + '/imageFetcher.php?image_id=' + str(iid) + '&type=feed&url=' + escImage, 2)
                    imageFetchingSocket = urllib.urlopen(infoModule.info.site['imageReceiver'] + '/imageFetcher.php?image_id=' + str(iid) + '&type=feed&url=' + escImage)
                imageInsert = imageFetchingSocket.read()
                log.plog('AS: ' + str(imageInsert), 2)

                #check that image insert was successful
                sql = "select * from " + siteDB + ".images where image_id=" + str(iid)
                log.plog('AS: ' + sql)
                chkImageQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
                chkImage = chkImageQ.fetch_row(1,1)
                if chkImage == () or chkImage[0]['sxsize'] == '0':
                    log.plog('image import error!', 4)
                    mysql_tools.mysqlQuery("delete from " + siteDB + ".images where image_id=" + str(iid), infoModule.info.site['dblink'])
                    storyImageAdded = False
                    ## if featured source but no image, change top pick flag to false
                    if 'featured_source' in infoModule.info.source and infoModule.info.source['featured_source'] == '1':
                        log.plog('featured source image import error', 4)
                        mysql_tools.mysqlQuery("update " + siteDB + "." + storyTable + " set topPick=false where sub_id=" + str(newSubId), infoModule.info.site['dblink'])
                else:
                    log.plog('AS: storyimageadded = true')
                    storyImageAdded = True
            else:
                log.plog('AS: debug_mod on')
    if storyImageAdded == False:
        #if no story found and auto_featured source, do not feature
        if 'featured_source' in infoModule.info.source and infoModule.info.source['featured_source'] == '1':
            sql = "update " + siteDB + "." + storyTable + " set topPick=false where sub_id=" + str(newSubId)
            if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False:
                mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])   
        #no story image added, so pick top entity image    
        entityImageQ = mysql_tools.mysqlQuery("select image_id from db_topics.celebs_images where celeb_id=" + str(entityForImage) + " order by seq limit 1", infoModule.info.site['dblink'])
        entityImage = entityImageQ.fetch_row(1,1)
        if entityImage == ():
            #no image
            log.plog("error could not get a result for select image_id from db_topics.celebs_images where celeb_id=" + str(entityForImage) + " order by seq limit 1", 3)
            sql = "update " + siteDB + "." + storyTable + " set imgsrc='S', image_id=0 where sub_id=" + str(newSubId)
        else:
            iid = entityImage[0]['image_id']      
            sql = "update " + siteDB + "." + storyTable + " set imgsrc='S', image_id=" + iid + " where sub_id=" + str(newSubId)
        if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False:
            mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])   
            
    highlightedBody = highlightEntities.highlightEntitiesFromList(infoModule.info.page['outline'], infoModule.info.entityList, True)
    highlightedBody = highlightedBody.replace("'", "\\'")
    sql = "update " + siteDB + "." + storyTable + " set outline='" + highlightedBody + "' where sub_id=" + str(newSubId)
    if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False:
        mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])   
        
    ### add to atom queue if in subs story table
    if storyTable == 'subs':
        sql = "insert into " + siteDB + ".atom_queue set placed=now(), sub_id=" + str(newSubId)
        if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False:
            mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])   
    

    ### YAY! Done
    return(newSubId)
示例#3
0
def setPrimo():       
    ###################################################################
    ##func: setPrimo
    ##param: none
    ##desc: takes no params.  Instead, as long as page title, page text
    ##      and celebList are properly put together, it will rank the entities
    ##      for primo position
    ##ret: None
    ##auth: esr
    ##################################################################
    
    if 'outline' not in infoModule.info.page or infoModule.info.page['outline'] == '':
        log.plog('no outline set before setPrimo called', 5)
        return

    if 'title' not in infoModule.info.page or infoModule.info.page['title'] == '':
        log.plog('no title set before setPrimo called', 5)
        return
        
    if len(infoModule.info.entityList) == 0:
        log.plog('no entities set before setPrimo called', 5)
        return
        
    #highlight text as a way to count the frequency
    highlightedText = highlightEntities.highlightEntitiesFromList(infoModule.info.page['title'] + ' ' + infoModule.info.page['outline'], infoModule.info.entityList)
    while True:
        taggedEntity = re.search('\[celeb url=(.+?)\]', highlightedText)
        if taggedEntity == None:
            break
        highlightedText = highlightedText.replace(taggedEntity.group(0), '', 1)
        lookupUrl = taggedEntity.group(1)
        sql = "select celeb_id from db_topics.celebs where lookupUrl='" + taggedEntity.group(1) + "'"
        entityByLookupQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])  
        while (1):
            entityByLookup=entityByLookupQ.fetch_row(1,1)
            if entityByLookup == ():
                break
            if infoModule.info.entityList[entityByLookup[0]['celeb_id']]['frequency'] == 0:
                infoModule.info.entityList[entityByLookup[0]['celeb_id']]['frequency'] = 1
            else:
                infoModule.info.entityList[entityByLookup[0]['celeb_id']]['frequency'] += 1
                
    # now frequency is set, first two are based on position, next two based on frequency
    primoTypes = ['Y', '2', '3', '4', 'N']
    primoTypeCursor = 0
    
    #if source is associated with entity, it's auto-set to primo
    if 'celeb_id' in infoModule.info.source and infoModule.info.source['celeb_id'] != '' and int(infoModule.info.source['celeb_id']) > 0:
        infoModule.info.entityList[infoModule.info.source['celeb_id']]['primo'] = primoTypes[primoTypeCursor]
        primoTypeCursor += 1


    posArray = []
    entKeys = infoModule.info.entityList.keys()
    for eKey in entKeys:
        if infoModule.info.entityList[eKey]['position'] != None:
            posArray.append([eKey, infoModule.info.entityList[eKey]['position'] ])
    
    posArray.sort(key=lambda x: x[1])
    
    freqArray = []
    entKeys = infoModule.info.entityList.keys()
    for eKey in entKeys:
        if infoModule.info.entityList[eKey]['frequency'] != None and infoModule.info.entityList[eKey]['frequency'] > 1:
            freqArray.append([eKey, infoModule.info.entityList[eKey]['frequency'] ])

    freqArray.sort(key=lambda x: x[1], reverse=True)
    
    titleLen = len(infoModule.info.page['title'])
    #primo set by position in title
    ctr = 0
    while primoTypeCursor < 4 and len(posArray) > ctr:
        if posArray[ctr][1] > titleLen:
            break
        ## only non-hidden entities can be primo
        if entityLib.entityLibrary(posArray[ctr][0], 'visibility') != 'invisible':
            log.plog("entity %s found in title, setting to primo %s" % (posArray[ctr][0], primoTypes[primoTypeCursor]), 3)
            infoModule.info.entityList[posArray[ctr][0]]['primo'] = primoTypes[primoTypeCursor]
            primoTypeCursor += 1
        ctr += 1 

    #primo set by frequency
    ctr = 0
    while primoTypeCursor < 4 and ctr < len(freqArray):
        if infoModule.info.entityList[freqArray[ctr][0]]['primo'] == 'N':
            ## only non-hidden entities can be primo
            if entityLib.entityLibrary(freqArray[ctr][0], 'visibility') != 'invisible':
                log.plog("entity %s has high frequency, setting to primo %s" % (freqArray[ctr][0], primoTypes[primoTypeCursor]), 3)
                infoModule.info.entityList[freqArray[ctr][0]]['primo'] = primoTypes[primoTypeCursor]
                primoTypeCursor += 1
        ctr += 1 
        
    #if nothing found, go to first position and set it to primo
    if primoTypeCursor == 0:
        ctr = 0
        while len(posArray) > ctr:
            ## only non-hidden entities can be primo
            if entityLib.entityLibrary(posArray[ctr][0], 'visibility') != 'invisible':
                log.plog("entity %s found first in story, setting to primo %s" % (posArray[ctr][0], primoTypes[primoTypeCursor]), 3)
                infoModule.info.entityList[posArray[ctr][0]]['primo'] = primoTypes[primoTypeCursor]
                primoTypeCursor += 1
                break
            ctr += 1 
import highlightEntities
import sys

#link = mysql_tools.mysqlConnect('192.168.0.29', 'rw_failover', 'f@ilfa1l')
link = mysql_tools.mysqlConnect('127.0.0.1', 'root', '')
if link == False :
    print "no connection"
    sys.exit(0)
    
infoModule.info.site['dblink'] = link	
infoModule.info.site['log_priority_threshold'] = 2	

story = 'there has been much talk at gawker media about 2 much the new playstation 2 game called mr. poo poo playstation 2head 1423 is my number'

relatedArray = ['86464', '5754224']
output = highlightEntities.highlightEntitiesFromList(story, relatedArray, True)
print output
sys.exit(0)
#if len(sys.argv) > 1 and int(sys.argv[1]) > 0:
    #get specific sub_id
    #print "looking up sub_id: " + sys.argv[1]
    #randStoriesQ = mysql_tools.mysqlQuery("select sub_id, outline, title from db_celebrifi.subs where sub_id=" + sys.argv[1] + " limit 20", infoModule.info.site['dblink'])
#else:
    #randStoriesQ = mysql_tools.mysqlQuery("select sub_id, outline, title from db_celebrifi.subs order by rand() limit 20", infoModule.info.site['dblink'])
randStoriesQ = mysql_tools.mysqlQuery("select 19 as sub_id, 'Dallas Cowboy WR Patrick Craytons agent said that he will ask the Dallas Cowboys for the outright release of his client prior to the Saturday 53 man roster deadline according to the Dallas Morning News. Patrick Craytons agent told the newspaper, It seems he doesnt figure into their plans so its not logical to be on the roster until Saturday. We dont see the use in prolonging it....' as outline, 'Dallas Cowboys WR asks for release' as title", infoModule.info.site['dblink'])
while True:
    randStory = randStoriesQ.fetch_row(1,1)
    if randStory == ():
        break
    if randStory[0]['outline'] == None:
        print "error, outline is empty " + randStory[0]['sub_id']