from multiprocessing import Process,Pipe
import syslog
import sys
import os

from lib import sorting, wikicount

if __name__ == '__main__' :
	STARTTIME= wikicount.fnStartTimer()
	syslog.syslog('sortMongoHD.csv: starting...')
	DAY,MONTH,YEAR,HOUR,expiretime= wikicount.fnReturnTimes()
	wikicount.fnSetStatusMsg('sortMongoHD',0)
	os.system("sed -i 1d /home/ec2-user/mongo.csv")
	sys.setrecursionlimit(2000)
	n=7  #number partitions to break into
	IFILE=open("/home/ec2-user/mongo.csv","r")
	SORTME=[]
	for line in IFILE:
		line=line.strip('"').split(',')
		HASH=line[1].replace("\"","")
		rec=(line[0],HASH)
		SORTME.append(rec)
	IFILE.close()

	print 'done reading list .... starting mulitple procs'
#	pconn,cconn=Pipe()
#	lyst=[]
#	p=Process(target=sorting.QuickSortMPListArray,args=(SORTME,cconn,n))
#	p.start()
#	print 'main proc started'
import syslog
from pymongo import Connection
from lib import wikicount

EXCEPTIONFILE = '/tmp/zExecption.log'
STARTTIME = wikicount.fnStartTimer()
conn = Connection()
db = conn.wc
DAY, MONTH, YEAR, HOUR, expiretimes = wikicount.fnReturnTimes()
HOUR = wikicount.minusHour(int(HOUR))
HOUR, HOUR2, HOUR3 = wikicount.fnReturnLastThreeHours(HOUR)
HOUR = wikicount.fnStrFmtDate(HOUR)
HOUR2 = wikicount.fnStrFmtDate(HOUR2)
HOUR3 = wikicount.fnStrFmtDate(HOUR3)
SPAMLIST = []
wikicount.fnSetStatusMsg('threehrrollingavg', 0)
SPAMCURSOR = db['spam'].find()
SPAMLIST = SPAMCURSOR.distinct('_id')
hourlies = []
TypeErrors = 0
KeyErrors = 0
z = 1
LANGUAGES = wikicount.LList
for lang in LANGUAGES:
    hourlies = []
    KeyErrors = 0
    TypeErrors = 0
    hhdTABLE = str(lang)+"_hitshourlydaily"
    hdTABLE = str(lang)+"_hitsdaily"
    outTABLE = str(lang)+"_threehour"
    lastTABLE = str(lang)+"_lastrollavg"
示例#3
0
from lib import wikicount


STARTTIME= wikicount.fnStartTimer()
syslog.syslog('tophits.py:  starting...')
DAY,MONTH,YEAR,HOUR,expiretime= wikicount.fnReturnTimes()
DAY,MONTH,HOUR= wikicount.fnFormatTimes(DAY,MONTH,HOUR)
MONTHNAME= wikicount.fnGetMonthName()
HOUR= wikicount.minusHour(int(HOUR))
conn=Connection()
db=conn.wc
RECCOUNT=1
DAYKEY=str(YEAR)+"_"+str(MONTH)+"_"+str(DAY)
PLACEMAP="hitsplacemap"

wikicount.fnSetStatusMsg('tophits',0)

LANGLIST= wikicount.getLanguageList()
for lang in LANGLIST:
	PLACEMAP=str(lang)+"_mapPlace"
	HITSMAP=str(lang)+"_mapHits"
	try:
		IFILE=open("/home/ec2-user/"+str(lang)+"_mongo.csv.sorted","r")
	except IOError:
		syslog.syslog("Error opening file for "+str(lang))
		continue
	RESULT=[]
	RECCOUNT=0
	for line in IFILE:
    		if RECCOUNT < 1000:
        		line=line.strip().split(",")
        delta=item['Hits']-YHits
        print 'hello!'
        nameq=db.hitsdaily.find({'_id':item['_id']})
        NEWPOST={'id':item['_id'],'delta':int(delta),'orPlace':item['place'],'title':nameq['title']}
        db.tmpHot.insert(NEWPOST)
        return


STARTTIME= wikicount.fnStartTimer()
wikicount.toSyslog('filltmpHot.py : starting...')
DAY,MONTH,YEAR,HOUR,expiretime= wikicount.fnReturnTimes()
DAYKEY=str(YEAR)+'_'+str(MONTH)+'_'+str(DAY)
COLLECTIONNAME=str('tophits')+DAYKEY
conn=Connection()
db=conn.wc
RECCOUNT=1
NUMRECS=250
wikicount.fnSetStatusMsg('fillTmpHot',0)
print COLLECTIONNAME
db.tmpHot.remove()
RESULT=db[COLLECTIONNAME].find()
#RESULT1=db[COLLECTIONNAME].find().limit(NUMRECS).skip(0)
#RESULT2=db[COLLECTIONNAME].find().limit(NUMRECS).skip(NUMRECS)
#RESULT3=db[COLLECTIONNAME].find().limit(NUMRECS).skip(NUMRECS*2)
#RESULT4=db[COLLECTIONNAME].find().limit(NUMRECS).skip(NUMRECS*3)
FillTmpHot(RESULT)
RUNTIME= wikicount.fnEndTimerCalcRuntime(STARTTIME)
wikicount.toSyslog('prepop_filltmpHot.py:  runtime is '+str(RUNTIME)+' seconds.')
wikicount.fnSetStatusMsg('fillTmpHot',3)
wikicount.fnLaunchNextJob('fillTmpHot')