def setPrimo(): ################################################################### ##func: setPrimo ##param: none ##desc: takes no params. Instead, as long as page title, page text ## and celebList are properly put together, it will rank the entities ## for primo position ##ret: None ##auth: esr ################################################################## if 'outline' not in infoModule.info.page or infoModule.info.page[ 'outline'] == '': log.plog('no outline set before setPrimo called', 5) return if 'title' not in infoModule.info.page or infoModule.info.page[ 'title'] == '': log.plog('no title set before setPrimo called', 5) return if len(infoModule.info.entityList) == 0: log.plog('no entities set before setPrimo called', 5) return #highlight text as a way to count the frequency highlightedText = highlightEntities.highlightEntitiesFromList( infoModule.info.page['title'] + ' ' + infoModule.info.page['outline'], infoModule.info.entityList) while True: taggedEntity = re.search('\[celeb url=(.+?)\]', highlightedText) if taggedEntity == None: break highlightedText = highlightedText.replace(taggedEntity.group(0), '', 1) lookupUrl = taggedEntity.group(1) sql = "select celeb_id from db_topics.celebs where lookupUrl='" + taggedEntity.group( 1) + "'" entityByLookupQ = mysql_tools.mysqlQuery( sql, infoModule.info.site['dblink']) while (1): entityByLookup = entityByLookupQ.fetch_row(1, 1) if entityByLookup == (): break if infoModule.info.entityList[entityByLookup[0] ['celeb_id']]['frequency'] == 0: infoModule.info.entityList[entityByLookup[0] ['celeb_id']]['frequency'] = 1 else: infoModule.info.entityList[entityByLookup[0] ['celeb_id']]['frequency'] += 1 # now frequency is set, first two are based on position, next two based on frequency primoTypes = ['Y', '2', '3', '4', 'N'] primoTypeCursor = 0 #if source is associated with entity, it's auto-set to primo if 'celeb_id' in infoModule.info.source and infoModule.info.source[ 'celeb_id'] != '' and int(infoModule.info.source['celeb_id']) > 0: infoModule.info.entityList[infoModule.info.source['celeb_id']][ 'primo'] = primoTypes[primoTypeCursor] primoTypeCursor += 1 posArray = [] entKeys = infoModule.info.entityList.keys() for eKey in entKeys: if infoModule.info.entityList[eKey]['position'] != None: posArray.append( [eKey, infoModule.info.entityList[eKey]['position']]) posArray.sort(key=lambda x: x[1]) freqArray = [] entKeys = infoModule.info.entityList.keys() for eKey in entKeys: if infoModule.info.entityList[eKey][ 'frequency'] != None and infoModule.info.entityList[eKey][ 'frequency'] > 1: freqArray.append( [eKey, infoModule.info.entityList[eKey]['frequency']]) freqArray.sort(key=lambda x: x[1], reverse=True) titleLen = len(infoModule.info.page['title']) #primo set by position in title ctr = 0 while primoTypeCursor < 4 and len(posArray) > ctr: if posArray[ctr][1] > titleLen: break ## only non-hidden entities can be primo if entityLib.entityLibrary(posArray[ctr][0], 'visibility') != 'invisible': log.plog( "entity %s found in title, setting to primo %s" % (posArray[ctr][0], primoTypes[primoTypeCursor]), 3) infoModule.info.entityList[ posArray[ctr][0]]['primo'] = primoTypes[primoTypeCursor] primoTypeCursor += 1 ctr += 1 #primo set by frequency ctr = 0 while primoTypeCursor < 4 and ctr < len(freqArray): if infoModule.info.entityList[freqArray[ctr][0]]['primo'] == 'N': ## only non-hidden entities can be primo if entityLib.entityLibrary(freqArray[ctr][0], 'visibility') != 'invisible': log.plog( "entity %s has high frequency, setting to primo %s" % (freqArray[ctr][0], primoTypes[primoTypeCursor]), 3) infoModule.info.entityList[ freqArray[ctr][0]]['primo'] = primoTypes[primoTypeCursor] primoTypeCursor += 1 ctr += 1 #if nothing found, go to first position and set it to primo if primoTypeCursor == 0: ctr = 0 while len(posArray) > ctr: ## only non-hidden entities can be primo if entityLib.entityLibrary(posArray[ctr][0], 'visibility') != 'invisible': log.plog( "entity %s found first in story, setting to primo %s" % (posArray[ctr][0], primoTypes[primoTypeCursor]), 3) infoModule.info.entityList[ posArray[ctr][0]]['primo'] = primoTypes[primoTypeCursor] primoTypeCursor += 1 break ctr += 1
def addStory(): siteDB = infoModule.info.site['database'] if 'debug_mode' in infoModule.info.site and infoModule.info.site['debug_mode'] == True: debugMode = True else: debugMode = False if 'title' not in infoModule.info.page or infoModule.info.page['title'] == '': log.plog('addStory called without title', 5) return False if 'publish_immediately' in infoModule.info.source and infoModule.info.source['publish_immediately'] == '1': #if the feed always goes live, as in celeb blogs, then set status to A and story table to subs storyTable = "subs" newsroomonStoryTable = 'newsroom' storyStatus = "A" #$storyUser = $source['user_alias' storyUser = -1 storyScore = 1 else: storyTable = "newsroom" nonStoryTable = 'subs' storyStatus = "A" storyUser = -1 storyScore = 0 if 'featured_source' in infoModule.info.source and infoModule.info.source['featured_source'] == '1': topPick = 'true' infoModule.info.site['overrideImageMinSize'] = True else: topPick = 'false' vtid = 0 if 'vthumb' in infoModule.info.page and infoModule.info.page['vthumb'] != '': #there's a video, go get thumb mysql_tools.mysqlQuery("insert into " + siteDB + ".images set credit=''", infoModule.info.site['dblink']) # get the image id for the subs table vtid = infoModule.info.site['dblink'].insert_id() escImage = urllib.quote(infoModule.info.page['vthumb']) imageFetchingSocket = urllib.urlopen(infoModule.info.site['imageReceiver'] + '/imageFetcher.php?image_id=' + str(vtid) + '&type=video&url=' + escImage) imageInsert = imageFetchingSocket.read() log.plog(imageInsert, 2) # strip multi-byte chars, log differences" outline_before_replace = infoModule.info.page['outline'] infoModule.info.page['outline'] = encoding.convertToAscii(infoModule.info.page['outline']) infoModule.info.page['title'] = encoding.convertToAscii(infoModule.info.page['title']) if outline_before_replace != infoModule.info.page['outline']: log.plog('multi-byte char(s) replaced', 3) if 'source_id' not in infoModule.info.source: infoModule.info.source['source_id'] = '0' #sanity checks #url must have http:// if infoModule.info.page['url'][0:7] != 'http://' and infoModule.info.page['url'][0:8] != 'https://': log.plog("relative url, no host! " + infoModule.info.page['url'], 2) return False #mysql safe outline = infoModule.info.page['outline'].replace("'", "\\'") title = infoModule.info.page['title'].replace("'", "\\'") title = title.strip() if 'author' in infoModule.info.page: author = infoModule.info.page['author'].replace("'", "\\'") else: author = '' if 'vlink' not in infoModule.info.page or infoModule.info.page['vlink'] == None: vlink = '' else: vlink = infoModule.info.page['vlink'] if 'sourceType' not in infoModule.info.page: sourceType = '' else: sourceType = infoModule.info.page['sourceType'] if 'promoter' in infoModule.info.page: promoter = infoModule.info.page['promoter'] if promoter == '': promoter = -1 else: promoter = -1 # do something special with blind sources if str(infoModule.info.source['source_id']) == '0': #if source title is not set, set it. infoModule.info.source['title'] = deduceSourceTitle(infoModule.info.page['url']) if infoModule.info.source['title'] == '': log.plog('ERROR could not deduce unknown source title', 5) os._exit(0) log.plog('splitting url into parts for url_regex: %s' % infoModule.info.page['url'], 2) URLParts = urlparse.urlparse(infoModule.info.page['url']) myHost = URLParts[1] myPath = URLParts[2] #if there is source info (site_url, url_regex, title) and no source infoModule.info.source['url_regex'] = myHost log.plog('setting url_regex for new source to ' + myHost, 2) log.plog("select * from " + siteDB + ".sources where url_regex='" + infoModule.info.source['url_regex'] + "'", 2) checkSourceQ = mysql_tools.mysqlQuery("select * from " + siteDB + ".sources where url_regex='" + infoModule.info.source['url_regex'] + "'", infoModule.info.site['dblink']) if checkSourceQ.num_rows() == 0: matches = re.search('^(.*?):', infoModule.info.source['title']) if matches != None: infoModule.info.source['title'] = matches.group(1) log.plog('adding new source', 3) sql = "insert into " + siteDB + ".sources set title='" + infoModule.info.source['title'].replace("'", "\\'") + "', url_regex='" + infoModule.info.source['url_regex'] + "', site_url='http://" + myHost + "', machine_generated = true" if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False: mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) log.plog(sql, 2) #associate new source with new sub newSourceId = infoModule.info.site['dblink'].insert_id() infoModule.info.source['source_id'] = str(newSourceId) else: foundSourceID = checkSourceQ.fetch_row(1,1) infoModule.info.source['source_id'] = foundSourceID[0]['source_id'] if int(infoModule.info.source['source_id']) == '0': log.plog('ERROR source_id is 0', 5) os._exit(0) if len(infoModule.info.entityList) == 0: log.plog('ERROR no entities in story', 5) os._exit(0) sql = "insert into " + siteDB + "."+ storyTable + " set user_id='" + str(storyUser) + "', sdatetime=now(), firstPosted=now(), title='" + title + "', outline='" + outline + "',url='" + infoModule.info.page['url'] + "', status='" + storyStatus + "', score=" + str(storyScore) + ", vlink='" + vlink + "', vthumb=" + str(vtid) + ", source_id=" + str(infoModule.info.source['source_id']) + ", sourceType='" + sourceType + "', celebrifier='" + str(promoter) + "', topPick=" + topPick + ", title_source='" + str(infoModule.info.source['source_id']) + " " + title + "', author='" + author + "'" log.plog(sql, 2) if 'debug_mode' in infoModule.info.site and infoModule.info.site['debug_mode'] == True: log.plog("Debug mode. Not inserting: " + sql, 2) newSubId = 0 else: res = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) if res == False: return False newSubId = infoModule.info.site['dblink'].insert_id() if newSubId == 0: log.plog('error inserting new story', 5) return False #record the last story entered for debugging purposes sql = "update " + siteDB + ".sources set last_story_date=now(), last_story_id=" + str(newSubId) + " where source_id=" + str(infoModule.info.source['source_id']) if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False: mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) if storyTable == 'subs' and len(vlink) > 0 and 'videoStory' in infoModule.info.page and infoModule.info.page['videoStory'] != True: log.plog("creating video-only story", 2) tempCelebList = infoModule.info.entityList #create_video_story.createVideoStory($entry['vlink'], $tempCelebList, $source) sql = "update " + siteDB + ".relatedSubs set sub_id2 = " + str(newSubId) + " where sub_id2 = -1 and table2='" + storyTable + "'" if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False: mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) try: vurls = infoModule.info.site['vurls'] for i in range(len(vurls)): #if promoter > 0, then this is from an outbound link log.plog("PROMOTER: " + str(promoter), 2) promoter = vurls[i]['promoter'] if int(promoter) > 0: sql = "insert into " + siteDB + ".linkHistory set sub_id=" + str(newSubId) + ", linker_id=-1, feedIdx=" + str(promoter) + ", linkerURL='" + vurls[i]['submittingURL'] + "', linkedOn=now(), userVote=false" if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False: mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) except: pass #complete any linkhistory records sql = "update " + siteDB + ".linkHistory set linker_id=" + str(newSubId) + " where linker_id=-1 and feedIdx=" + str(infoModule.info.source['source_id']) + " and linkerURL='" + infoModule.info.page['url'] + "'" if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False: mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) ## experimental. for sportifi, try finding team from city and athletes #if infoModule.info.site['addTeamFromCity']: # getTeamFromCity() ## add entities totalEntitiesAdded = 0 entityForImage = 0 entityForImageRank = dict({'Y':1, '2':2, '3':3, '4':4}) lastEntityForImageRank = 5 #finalEkeys holds the list of entities that are ACTUALLY in the story finalEkeys = [] entKeys = infoModule.info.entityList.keys() for eKey in entKeys: videoPoints = 0 picPoints = 0 if infoModule.info.entityList[eKey]['primo'] != 'N': if entityForImageRank[infoModule.info.entityList[eKey]['primo']] < lastEntityForImageRank: entityForImage = eKey lastEntityForImageRank = entityForImageRank[infoModule.info.entityList[eKey]['primo']] totalEntitiesAdded += 1 sql = "insert into " + siteDB + "." + storyTable + "_celebs set celeb_id=" + str(eKey) + ", sub_id=" + str(newSubId) + ", primo='" + infoModule.info.entityList[eKey]['primo'] + "'" finalEkeys.append(eKey) log.plog(sql, 2) if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False: mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) ## stats statsExistsQ = mysql_tools.mysqlQuery("select celeb_id from " + siteDB + ".celebStats where celeb_id=" + str(eKey), infoModule.info.site['dblink']) if vlink != '': videoPoints = 1 if 'overrideImageMinSize' not in infoModule.info.site: infoModule.info.site['overrideImageMinSize'] = False if 'imageMinSize' not in infoModule.info.site: infoModule.info.site['imageMinSize'] = 0 if 'maxSize' not in infoModule.info.page: infoModule.info.page['maxSize'] = 0 if (infoModule.info.site['overrideImageMinSize'] and infoModule.info.page['maxSize'] > 0) or infoModule.info.page['maxSize'] > infoModule.info.site['overrideImageMinSize']: picPoints = 1 if statsExistsQ.num_rows() > 0: sql = "update " + siteDB + ".celebStats set photos = photos + " + str(picPoints) + ", videos = videos + " + str(videoPoints) + ", stories = stories + 1 where celeb_id = " + str(eKey) else: sql = "insert into " + siteDB + ".celebStats set photos = photos + " + str(picPoints) + ", videos = videos + " + str(videoPoints) + ", stories = stories + 1, celeb_id = " + str(eKey) #log.plog(sql, 2) if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False: mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) #for weighted stats. Weighted stats depend on how primo the #entity is. storiesWeighted = 1 if infoModule.info.entityList[eKey]['primo'] == 'Y': storiesWeighted = 10 elif infoModule.info.entityList[eKey]['primo'] == '2': storiesWeighted = 7 elif infoModule.info.entityList[eKey]['primo'] == '3': storiesWeighted = 5 elif infoModule.info.entityList[eKey]['primo'] == '4': storiesWeighted = 3 #break up stats time by hourly blocks hourBlock = int(math.floor(time.time() / 3600)) statsExistsQ = mysql_tools.mysqlQuery("select celeb_id from " + siteDB + ".celebHourlyStats where celeb_id=" + str(eKey) + " and hourBlock = " + str(hourBlock), infoModule.info.site['dblink']) if statsExistsQ.num_rows() > 0: sql = "update " + siteDB + ".celebHourlyStats set photos = photos + " + str(picPoints) + ", videos = videos + " + str(videoPoints) + ", stories = stories + 1, storiesWeighted = storiesWeighted + " + str(storiesWeighted) + " where celeb_id = " + str(eKey) + " and hourBlock = " + str(hourBlock) else: sql = "insert into " + siteDB + ".celebHourlyStats set photos = " + str(picPoints) + ", videos = " + str(videoPoints) + ", stories = 1, storiesWeighted = " + str(storiesWeighted) + ", celeb_id = " + str(eKey) + ", hourBlock = " + str(hourBlock) # now do stats by day dayBlock = int(math.floor(time.time() / 86400)) dayStatsExistsQ = mysql_tools.mysqlQuery("select celeb_id from " + siteDB + ".celebDailyStats where celeb_id=" + str(eKey) + " and dayBlock = " + str(hourBlock), infoModule.info.site['dblink']) if dayStatsExistsQ.num_rows() > 0: sql = "update " + siteDB + ".celebDailyStats set stories = stories + 1, storiesWeighted = storiesWeighted + " + str(storiesWeighted) + " where celeb_id = " + str(eKey) + " and dayBlock = " + str(hourBlock) else: sql = "insert into " + siteDB + ".celebDailyStats set stories = 1, storiesWeighted = " + str(storiesWeighted) + ", celeb_id = " + str(eKey) + ", dayBlock = " + str(hourBlock) #log.plog(sql, 2) if storyTable == 'subs': #don't count stories towards stats if they are in newsroom if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False: mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) #update celeb popularity if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False: sql = "update db_topics.celebs set popularity=popularity+1 where celeb_id=" + str(eKey) mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) freqArray = [] log.plog('Entities:', 2) for entity_id in infoModule.info.entityList.keys(): log.plog(str(entity_id) + ' ' + infoModule.info.entityList[entity_id]['primo'], 2) for eKey in entKeys: #note. setting frequency to > 0 may be a good idea to remove entities that aren't really in story if infoModule.info.entityList[eKey]['frequency'] != None and infoModule.info.entityList[eKey]['frequency'] >= 0 and infoModule.info.entityList[eKey]['primo'] == 'N': freqArray.append([eKey, infoModule.info.entityList[eKey]['frequency'] ]) freqArray.sort(key=lambda x: x[1], reverse=True) for i in range(len(freqArray)): totalEntitiesAdded += 1 if entityForImage == 0: entityForImage = eKey sql = "insert into " + siteDB + "." + storyTable + "_celebs set celeb_id=" + str(freqArray[i][0]) + ", sub_id=" + str(newSubId) + ", primo='" + infoModule.info.entityList[freqArray[i][0]]['primo'] + "'" finalEkeys.append(freqArray[i][0]) if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False: mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) if totalEntitiesAdded >= infoModule.info.site['maxEntities']: break ##### end of adding entities #### relationships #### only add relationships between non-hidden entities invisibleTypesQuery = mysql_tools.mysqlQuery("select mptype_id from db_topics.mptypes where visibility='invisible'", infoModule.info.site['dblink']) invisibleTypes = '' sep = '' while True: oneType = invisibleTypesQuery.fetch_row(1,1) if oneType == (): break invisibleTypes = invisibleTypes + sep + oneType[0]['mptype_id'] sep = ',' entKeys = infoModule.info.entityList.keys() inList = '' sep = '' for eKey in entKeys: inList = inList + sep + str(eKey) sep = ',' sql = "select celeb_id from db_topics.celebs where mptype_id not in (" + invisibleTypes + ") and celeb_id in (" + inList + ")" realEntityQuery = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) realEntityList = [] while True: realEntity = realEntityQuery.fetch_row(1,1) if realEntity == (): break realEntityList.append(realEntity[0]['celeb_id']) for i in range(len(realEntityList) - 1): entity1 = realEntityList[i] # no more than 1 for j in range(i,len(realEntityList)): entity2 = realEntityList[j] sql = "select * from db_topics.celebs_related where (cid_1=" + entity1 + " and cid_2 = " + entity2 + ") or (cid_1 = " + entity2 + " and cid_2 = " + entity1 + ")" #log.plog(sql, 2) existingRelationshipQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) if existingRelationshipQ.num_rows() > 0: #found an existing relationship update it. sql = "update db_topics.celebs_related set relevance=relevance+1 where (cid_1=" + entity1 + " and cid_2 = " + entity2 + ") or (cid_1 = " + entity2 + " and cid_2 = " + entity1 + ")" #log.plog(sql, 2) if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False: mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) else: #put them into the queue as possibly related. after relevance reaches 20, make a relationship sql = "select * from db_topics.pending_relationships where cid_1=" + entity1 + " and cid_2=" + entity2 pendingRelationshipQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) if pendingRelationshipQ.num_rows() > 0: sql = "update db_topics.pending_relationships set relevance = relevance + 1 where cid_1=" + entity1 + " and cid_2=" + entity2 #log.plog(sql, 2) if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False: mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) sql = "select * from db_topics.pending_relationships where cid_1=" + entity2 + " and cid_2=" + entity1 pendingRelationship2Q = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) if pendingRelationship2Q.num_rows() > 0: sql = "update db_topics.pending_relationships set relevance = relevance + 1 where cid_1=" + entity2 + " and cid_2=" + entity1 #log.plog(sql, 2) if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False: mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) if pendingRelationshipQ.num_rows() == 0 and pendingRelationship2Q.num_rows() == 0: #no pending relationship, create it sql = "insert into db_topics.pending_relationships set cid_1 = " + entity1 + ", cid_2 = " + entity2 + ", relevance = 1" #log.plog(sql, 2) if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False: mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) sql = "select * from db_topics.pending_relationships where relevance > 20" newRelationshipQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) while True: newRelationship = newRelationshipQ.fetch_row(1,1) if newRelationship == (): break #cid_1 should be small newRelationship_cid_1 = int(newRelationship[0]['cid_1']) newRelationship_cid_2 = int(newRelationship[0]['cid_2']) if newRelationship_cid_1 > newRelationship_cid_2: tmpRelationship = newRelationship_cid_1 newRelationship_cid_1 = newRelationship_cid_2 newRelationship_cid_2 = tmpRelationship sql = "insert into db_topics.celebs_related set cid_1=" + str(newRelationship_cid_1) + ", cid_2=" + str(newRelationship_cid_2) + ", relevance=" + newRelationship[0]['relevance'] if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False: mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False: mysql_tools.mysqlQuery("delete from db_topics.pending_relationships where relevance > 20", infoModule.info.site['dblink']) # for stories that match conditions, set site_x = true so that sub-sites will recognize this story siteIds = [] siteSourcesQ = mysql_tools.mysqlQuery("select * from " + siteDB + ".sites_sources where source_id=" + str(infoModule.info.source['source_id']), infoModule.info.site['dblink']) while True: siteSources = siteSourcesQ.fetch_row(1,1) if siteSources == (): break if siteSources[0]['site_id'] not in siteIds: siteIds.append(siteSources[0]['site_id']) log.plog('appending ' + str(siteSources[0]['site_id']) + ' to site IDs from source ' + str(infoModule.info.source['source_id']), 2) for eKey in finalEkeys: # only entities in master 15 are used for consideration in sub sites sql = "select sub_id from " + siteDB + "." + storyTable + "_celebs where celeb_id=" + str(eKey) + " and sub_id=" + str(newSubId) log.plog(sql, 2) inStoryQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) if inStoryQ.num_rows() == 1: sql = "select * from " + siteDB + ".sites_celebs where celeb_id=" + str(eKey) #log.plog(sql, 2) siteEntitiesQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) while True: siteEntity = siteEntitiesQ.fetch_row(1,1) if siteEntity == (): break if siteEntity[0]['site_id'] not in siteIds: siteIds.append(siteEntity[0]['site_id']) log.plog('appending ' + str(siteEntity[0]['site_id']) + ' to site IDs from entity ' + str(eKey), 2) #have now created a list of sites. Commit sitesQueryString = '' sep = '' for i in range(len(siteIds)): sitesQueryString = sitesQueryString + sep + "site_" + str(siteIds[i]) + " = true" sep = ',' # for each site, check to see if the table newsroom_sites_x or subs_sites_x exists where x is site_id sql = "SHOW TABLES from " + siteDB + " like '" + storyTable + "_sites_" + str(siteIds[i] + "'") linkTableExists = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) if linkTableExists == False: # if it doesn't exist, create it. sql = "create table " + siteDB + "." + storyTable + "_sites_" + str(siteIds[i]) + " (sub_id int primary key) engine=innoDB" if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False: mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) # insert sub_id into subs_sites_x sql = "INSERT INTO "+ siteDB + "." + storyTable + "_sites_" + str(siteIds[i]) + " set sub_id=" + str(newSubId) log.plog(sql, 3) if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False: mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) if len(siteIds) > 0: sql = "update " + siteDB + "." + storyTable + " set " + sitesQueryString + " where sub_id=" + str(newSubId) log.plog(sql, 2) if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False: mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) ##### add images ##### log.plog('AS: adding images') if 'maxSize' not in infoModule.info.page: infoModule.info.page['maxSize'] = 0 if 'imageMinSize' not in infoModule.info.site: infoModule.info.site['imageMinSize'] = 99999 if 'overrideImageMinSize' not in infoModule.info.site: infoModule.info.site['overrideImageMinSize'] = False log.plog("AS: maxSize: " + str(infoModule.info.page['maxSize']) + "(" + str(infoModule.info.site['imageMinSize']) + ") overrideImageMinSize: " + str(infoModule.info.site['overrideImageMinSize']), 2) storyImageAdded = False if (infoModule.info.site['overrideImageMinSize'] == True and infoModule.info.page['maxSize'] > 0) or infoModule.info.page['maxSize'] > infoModule.info.site['imageMinSize']: if 'largestImage' in infoModule.info.page: log.plog("AS: largest image: " + infoModule.info.page['largestImage'], 2) if 'imageSource' not in infoModule.info.page or infoModule.info.page['imageSource'] == None: infoModule.info.page['imageSource'] = '' log.plog('AS: imageSource not in infoModule.') else: log.plog('AS: imageSource IS IN infoModule.') infoModule.info.page['imageSource'] = infoModule.info.page['imageSource'].replace("'", "\\'") sql = "insert into " + siteDB + ".images set credit='" + infoModule.info.page['imageSource'] + "', originalUrl='" + infoModule.info.page['imageSource'] + "'" log.plog('AS: ' + sql) if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False: log.plog('AS: debug_mode NOT on') mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) #new image id iid = infoModule.info.site['dblink'].insert_id() sql = "update " + siteDB + "." + storyTable + " set image_id=" + str(iid) + ", imgsrc='U' where sub_id=" + str(newSubId) log.plog("AS: " + sql) mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) escImage = urllib.quote(infoModule.info.page['largestImage']) log.plog('AS: escImage: ' + escImage, 2) if 'featured_source' in infoModule.info.source and infoModule.info.source['featured_source'] == '1': log.plog('AS: ' + infoModule.info.site['imageReceiver'] + '/imageFetcher.php?image_id=' + str(iid) + '&type=feed&topPick=true&url=' + escImage, 2) imageFetchingSocket = urllib.urlopen(infoModule.info.site['imageReceiver'] + '/imageFetcher.php?image_id=' + str(iid) + '&type=feed&topPick=true&url=' + escImage) else: log.plog('AS: ' + infoModule.info.site['imageReceiver'] + '/imageFetcher.php?image_id=' + str(iid) + '&type=feed&url=' + escImage, 2) imageFetchingSocket = urllib.urlopen(infoModule.info.site['imageReceiver'] + '/imageFetcher.php?image_id=' + str(iid) + '&type=feed&url=' + escImage) imageInsert = imageFetchingSocket.read() log.plog('AS: ' + str(imageInsert), 2) #check that image insert was successful sql = "select * from " + siteDB + ".images where image_id=" + str(iid) log.plog('AS: ' + sql) chkImageQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) chkImage = chkImageQ.fetch_row(1,1) if chkImage == () or chkImage[0]['sxsize'] == '0': log.plog('image import error!', 4) mysql_tools.mysqlQuery("delete from " + siteDB + ".images where image_id=" + str(iid), infoModule.info.site['dblink']) storyImageAdded = False ## if featured source but no image, change top pick flag to false if 'featured_source' in infoModule.info.source and infoModule.info.source['featured_source'] == '1': log.plog('featured source image import error', 4) mysql_tools.mysqlQuery("update " + siteDB + "." + storyTable + " set topPick=false where sub_id=" + str(newSubId), infoModule.info.site['dblink']) else: log.plog('AS: storyimageadded = true') storyImageAdded = True else: log.plog('AS: debug_mod on') if storyImageAdded == False: #if no story found and auto_featured source, do not feature if 'featured_source' in infoModule.info.source and infoModule.info.source['featured_source'] == '1': sql = "update " + siteDB + "." + storyTable + " set topPick=false where sub_id=" + str(newSubId) if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False: mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) #no story image added, so pick top entity image entityImageQ = mysql_tools.mysqlQuery("select image_id from db_topics.celebs_images where celeb_id=" + str(entityForImage) + " order by seq limit 1", infoModule.info.site['dblink']) entityImage = entityImageQ.fetch_row(1,1) if entityImage == (): #no image log.plog("error could not get a result for select image_id from db_topics.celebs_images where celeb_id=" + str(entityForImage) + " order by seq limit 1", 3) sql = "update " + siteDB + "." + storyTable + " set imgsrc='S', image_id=0 where sub_id=" + str(newSubId) else: iid = entityImage[0]['image_id'] sql = "update " + siteDB + "." + storyTable + " set imgsrc='S', image_id=" + iid + " where sub_id=" + str(newSubId) if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False: mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) highlightedBody = highlightEntities.highlightEntitiesFromList(infoModule.info.page['outline'], infoModule.info.entityList, True) highlightedBody = highlightedBody.replace("'", "\\'") sql = "update " + siteDB + "." + storyTable + " set outline='" + highlightedBody + "' where sub_id=" + str(newSubId) if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False: mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) ### add to atom queue if in subs story table if storyTable == 'subs': sql = "insert into " + siteDB + ".atom_queue set placed=now(), sub_id=" + str(newSubId) if 'debug_mode' not in infoModule.info.site or infoModule.info.site['debug_mode'] == False: mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) ### YAY! Done return(newSubId)
def setPrimo(): ################################################################### ##func: setPrimo ##param: none ##desc: takes no params. Instead, as long as page title, page text ## and celebList are properly put together, it will rank the entities ## for primo position ##ret: None ##auth: esr ################################################################## if 'outline' not in infoModule.info.page or infoModule.info.page['outline'] == '': log.plog('no outline set before setPrimo called', 5) return if 'title' not in infoModule.info.page or infoModule.info.page['title'] == '': log.plog('no title set before setPrimo called', 5) return if len(infoModule.info.entityList) == 0: log.plog('no entities set before setPrimo called', 5) return #highlight text as a way to count the frequency highlightedText = highlightEntities.highlightEntitiesFromList(infoModule.info.page['title'] + ' ' + infoModule.info.page['outline'], infoModule.info.entityList) while True: taggedEntity = re.search('\[celeb url=(.+?)\]', highlightedText) if taggedEntity == None: break highlightedText = highlightedText.replace(taggedEntity.group(0), '', 1) lookupUrl = taggedEntity.group(1) sql = "select celeb_id from db_topics.celebs where lookupUrl='" + taggedEntity.group(1) + "'" entityByLookupQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) while (1): entityByLookup=entityByLookupQ.fetch_row(1,1) if entityByLookup == (): break if infoModule.info.entityList[entityByLookup[0]['celeb_id']]['frequency'] == 0: infoModule.info.entityList[entityByLookup[0]['celeb_id']]['frequency'] = 1 else: infoModule.info.entityList[entityByLookup[0]['celeb_id']]['frequency'] += 1 # now frequency is set, first two are based on position, next two based on frequency primoTypes = ['Y', '2', '3', '4', 'N'] primoTypeCursor = 0 #if source is associated with entity, it's auto-set to primo if 'celeb_id' in infoModule.info.source and infoModule.info.source['celeb_id'] != '' and int(infoModule.info.source['celeb_id']) > 0: infoModule.info.entityList[infoModule.info.source['celeb_id']]['primo'] = primoTypes[primoTypeCursor] primoTypeCursor += 1 posArray = [] entKeys = infoModule.info.entityList.keys() for eKey in entKeys: if infoModule.info.entityList[eKey]['position'] != None: posArray.append([eKey, infoModule.info.entityList[eKey]['position'] ]) posArray.sort(key=lambda x: x[1]) freqArray = [] entKeys = infoModule.info.entityList.keys() for eKey in entKeys: if infoModule.info.entityList[eKey]['frequency'] != None and infoModule.info.entityList[eKey]['frequency'] > 1: freqArray.append([eKey, infoModule.info.entityList[eKey]['frequency'] ]) freqArray.sort(key=lambda x: x[1], reverse=True) titleLen = len(infoModule.info.page['title']) #primo set by position in title ctr = 0 while primoTypeCursor < 4 and len(posArray) > ctr: if posArray[ctr][1] > titleLen: break ## only non-hidden entities can be primo if entityLib.entityLibrary(posArray[ctr][0], 'visibility') != 'invisible': log.plog("entity %s found in title, setting to primo %s" % (posArray[ctr][0], primoTypes[primoTypeCursor]), 3) infoModule.info.entityList[posArray[ctr][0]]['primo'] = primoTypes[primoTypeCursor] primoTypeCursor += 1 ctr += 1 #primo set by frequency ctr = 0 while primoTypeCursor < 4 and ctr < len(freqArray): if infoModule.info.entityList[freqArray[ctr][0]]['primo'] == 'N': ## only non-hidden entities can be primo if entityLib.entityLibrary(freqArray[ctr][0], 'visibility') != 'invisible': log.plog("entity %s has high frequency, setting to primo %s" % (freqArray[ctr][0], primoTypes[primoTypeCursor]), 3) infoModule.info.entityList[freqArray[ctr][0]]['primo'] = primoTypes[primoTypeCursor] primoTypeCursor += 1 ctr += 1 #if nothing found, go to first position and set it to primo if primoTypeCursor == 0: ctr = 0 while len(posArray) > ctr: ## only non-hidden entities can be primo if entityLib.entityLibrary(posArray[ctr][0], 'visibility') != 'invisible': log.plog("entity %s found first in story, setting to primo %s" % (posArray[ctr][0], primoTypes[primoTypeCursor]), 3) infoModule.info.entityList[posArray[ctr][0]]['primo'] = primoTypes[primoTypeCursor] primoTypeCursor += 1 break ctr += 1
import highlightEntities import sys #link = mysql_tools.mysqlConnect('192.168.0.29', 'rw_failover', 'f@ilfa1l') link = mysql_tools.mysqlConnect('127.0.0.1', 'root', '') if link == False : print "no connection" sys.exit(0) infoModule.info.site['dblink'] = link infoModule.info.site['log_priority_threshold'] = 2 story = 'there has been much talk at gawker media about 2 much the new playstation 2 game called mr. poo poo playstation 2head 1423 is my number' relatedArray = ['86464', '5754224'] output = highlightEntities.highlightEntitiesFromList(story, relatedArray, True) print output sys.exit(0) #if len(sys.argv) > 1 and int(sys.argv[1]) > 0: #get specific sub_id #print "looking up sub_id: " + sys.argv[1] #randStoriesQ = mysql_tools.mysqlQuery("select sub_id, outline, title from db_celebrifi.subs where sub_id=" + sys.argv[1] + " limit 20", infoModule.info.site['dblink']) #else: #randStoriesQ = mysql_tools.mysqlQuery("select sub_id, outline, title from db_celebrifi.subs order by rand() limit 20", infoModule.info.site['dblink']) randStoriesQ = mysql_tools.mysqlQuery("select 19 as sub_id, 'Dallas Cowboy WR Patrick Craytons agent said that he will ask the Dallas Cowboys for the outright release of his client prior to the Saturday 53 man roster deadline according to the Dallas Morning News. Patrick Craytons agent told the newspaper, It seems he doesnt figure into their plans so its not logical to be on the roster until Saturday. We dont see the use in prolonging it....' as outline, 'Dallas Cowboys WR asks for release' as title", infoModule.info.site['dblink']) while True: randStory = randStoriesQ.fetch_row(1,1) if randStory == (): break if randStory[0]['outline'] == None: print "error, outline is empty " + randStory[0]['sub_id']