def getQuserOpenPackage(basePath='s3://datamining.ym/dmuser/ykang/results/qUserInLast5EachDay', 
                        beginDay='2016-01-24', 
                        interval_='30',
                        isForward='0',
                        s3DictBasePath='s3://datamining.ym/dmuser/ykang/data/spark.ouwan.qPackageToId',
                        isDownload=True):
    mconf = MissionConf().setAppName('getQuserOpenPackage')
    msc = MissionContext(conf=mconf)
    [_, appPath] = msc.getFolder()
    if isDownload:
        for theDay in getDaysGen(beginDay, int(interval_), int(isForward)):
            BashUtil.s3Cp(os.path.join(basePath,theDay), appPath+os.sep+theDay, recursived=True)
    openPackage = {}
    mask = {'imei=333333333333333':1, 'imei=123456789abcdef':1, 'imei=111111111111111':1, 'imei=012345678912345':1, 'imei=000000000000000':1, 'imei=00000000000000':1}
    for (filename, _, files) in os.walk(appPath):
        print filename
        for gzfile in files:
            [_, ext] = os.path.splitext(gzfile)
            if ext == '.gz':
                f = SepFile('|')
                f.open(filename+os.sep+gzfile, mode='gzip', flag='rb')
                for line in f:
                    if line[0] not in mask:
                        if line[1] not in openPackage:
                            openPackage[line[1]] = int(line[2])
                        else:
                            openPackage[line[1]] += int(line[2])
                f.close()
    openTimes = []
    print 'sorting'
    packs = openPackage.keys()
    for key in packs:
        openTimes.append(openPackage[key])
    index = sorted(range(len(openTimes)), key=lambda k: openTimes[k], reverse=True)
    print 'sorted'
 
    writer = LineFile()
    writer.open(os.path.join(appPath, 'qUserOpenPackage.txt'), mode='txt', flag='w')
    for i in index:
        key = packs[i]
        value = openPackage[key]
        writer.writeLine(key + '|' + str(value))
    writer.close()

    #可以将qUserOpenPackageToOpenTimes写入到该位置Qpackage.QPACKAGE_ID_TXT
    index = 0; f = LineFile().open(Qpackage.QPACKAGE_ID_TXT, mode='txt', flag='w')
    for qPackage in openPackage:
        f.writeLine(qPackage + '|' + str(index))
        index += 1
    f.close()
    
    BashUtil.s3Cp(Qpackage.QPACKAGE_ID_TXT, dst=os.path.join(s3DictBasePath, 'qPackageToId.txt'), recursived=False)
    
    return openPackage
示例#2
0
 def writeQuserToId():
     qUser = set()
     f = SepFile(',').open(Quser.TOTAL_QUSER_TXT, 'txt', 'r')
     for line in f:
         username = '******' + line[0]
         if len(line[0]) == 0 and len(line[1]) != 0:
             username = '******' + line[1]
         qUser.add(username)
     qUser = list(qUser)
     f.close()
     
     f = LineFile().open(Quser.QUSER_ID_TXT, 'txt', 'w')
     for i in range(len(qUser)):
         f.writeLine(qUser[i] + '|' + str(i))
     f.close()
示例#3
0
文件: Quser.py 项目: KeyKy/look-alike
 def writeQuserToId(mask=None, inOrOut=True):
     qUser = set()
     f = SepFile(',').open(Quser.TOTAL_QUSER_TXT, 'txt', 'r')
     for line in f:
         username = '******' + line[0]
         if len(line[0]) == 0 and len(line[1]) != 0:
             username = '******' + line[1]
         if mask == None:
             qUser.add(username)
         else:
             if inOrOut:
                 if username in mask:
                     qUser.add(username)
             else:
                 if username not in mask:
                     qUser.add(username)
     qUser = list(qUser)
     f.close()
     
     f = LineFile().open(Quser.QUSER_ID_TXT, 'txt', 'w')
     for i in range(len(qUser)):
         f.writeLine(qUser[i] + '|' + str(i))
     f.close()
示例#4
0
    def writeQuserToId(mask=None, inOrOut=True):
        qUser = set()
        f = SepFile(',').open(Quser.TOTAL_QUSER_TXT, 'txt', 'r')
        for line in f:
            username = '******' + line[0]
            if len(line[0]) == 0 and len(line[1]) != 0:
                username = '******' + line[1]
            if mask == None:
                qUser.add(username)
            else:
                if inOrOut:
                    if username in mask:
                        qUser.add(username)
                else:
                    if username not in mask:
                        qUser.add(username)
        qUser = list(qUser)
        f.close()

        f = LineFile().open(Quser.QUSER_ID_TXT, 'txt', 'w')
        for i in range(len(qUser)):
            f.writeLine(qUser[i] + '|' + str(i))
        f.close()