def getQuserOpenPackage(basePath='s3://datamining.ym/dmuser/ykang/results/qUserInLast5EachDay', beginDay='2016-01-24', interval_='30', isForward='0', s3DictBasePath='s3://datamining.ym/dmuser/ykang/data/spark.ouwan.qPackageToId', isDownload=True): mconf = MissionConf().setAppName('getQuserOpenPackage') msc = MissionContext(conf=mconf) [_, appPath] = msc.getFolder() if isDownload: for theDay in getDaysGen(beginDay, int(interval_), int(isForward)): BashUtil.s3Cp(os.path.join(basePath,theDay), appPath+os.sep+theDay, recursived=True) openPackage = {} mask = {'imei=333333333333333':1, 'imei=123456789abcdef':1, 'imei=111111111111111':1, 'imei=012345678912345':1, 'imei=000000000000000':1, 'imei=00000000000000':1} for (filename, _, files) in os.walk(appPath): print filename for gzfile in files: [_, ext] = os.path.splitext(gzfile) if ext == '.gz': f = SepFile('|') f.open(filename+os.sep+gzfile, mode='gzip', flag='rb') for line in f: if line[0] not in mask: if line[1] not in openPackage: openPackage[line[1]] = int(line[2]) else: openPackage[line[1]] += int(line[2]) f.close() openTimes = [] print 'sorting' packs = openPackage.keys() for key in packs: openTimes.append(openPackage[key]) index = sorted(range(len(openTimes)), key=lambda k: openTimes[k], reverse=True) print 'sorted' writer = LineFile() writer.open(os.path.join(appPath, 'qUserOpenPackage.txt'), mode='txt', flag='w') for i in index: key = packs[i] value = openPackage[key] writer.writeLine(key + '|' + str(value)) writer.close() #可以将qUserOpenPackageToOpenTimes写入到该位置Qpackage.QPACKAGE_ID_TXT index = 0; f = LineFile().open(Qpackage.QPACKAGE_ID_TXT, mode='txt', flag='w') for qPackage in openPackage: f.writeLine(qPackage + '|' + str(index)) index += 1 f.close() BashUtil.s3Cp(Qpackage.QPACKAGE_ID_TXT, dst=os.path.join(s3DictBasePath, 'qPackageToId.txt'), recursived=False) return openPackage
def writeQuserToId(): qUser = set() f = SepFile(',').open(Quser.TOTAL_QUSER_TXT, 'txt', 'r') for line in f: username = '******' + line[0] if len(line[0]) == 0 and len(line[1]) != 0: username = '******' + line[1] qUser.add(username) qUser = list(qUser) f.close() f = LineFile().open(Quser.QUSER_ID_TXT, 'txt', 'w') for i in range(len(qUser)): f.writeLine(qUser[i] + '|' + str(i)) f.close()
def writeQuserToId(mask=None, inOrOut=True): qUser = set() f = SepFile(',').open(Quser.TOTAL_QUSER_TXT, 'txt', 'r') for line in f: username = '******' + line[0] if len(line[0]) == 0 and len(line[1]) != 0: username = '******' + line[1] if mask == None: qUser.add(username) else: if inOrOut: if username in mask: qUser.add(username) else: if username not in mask: qUser.add(username) qUser = list(qUser) f.close() f = LineFile().open(Quser.QUSER_ID_TXT, 'txt', 'w') for i in range(len(qUser)): f.writeLine(qUser[i] + '|' + str(i)) f.close()