Python Set.Set примеры использования

Язык программирования: Python

Пространство имен/Пакет: sets

Класс/Тип: Set

Метод/Функция: Set

Примеров на hotexamples.com: 30

Python Set.Set - 30 примеров найдено. Это лучшие примеры Python кода для sets.Set.Set, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Set(30)

union(7)

__init__(6)

intersection(5)

add(3)

db_update(1)

setdefault(1)

remove(1)

keys(1)

db_has_changed(1)

db_revert(1)

__additem__(1)

copy(1)

append(1)

__repr__(1)

__new__(1)

__contains__(1)

update(1)

Пример #1

Показать файл

Файл: check.py Проект: ominux/data-mining-projects

#!/usr/bin/env python

import sys
from sets import Set

true_dups = Set()
reported_dups = Set()

if len(sys.argv) < 3:
    print "Usage: python check.py reported_duplicates true_duplicates"
    exit(-1)

reported_duplicates_file = sys.argv[1]
true_duplicates_file = sys.argv[2]

with open(true_duplicates_file, "r") as inf:
    for line in inf:
        true_dups.add(line.strip())

with open(reported_duplicates_file, "r") as inf:
    for line in inf:
        reported_dups.add(line.strip())

tp, fp, fn = [0, 0, 0]

for pair in reported_dups:
    if pair in true_dups:
        tp += 1
    else:
        fp += 1

Пример #2

Показать файл

#!/opt/bb/bin/python

import sys
from sets import Set

program = {}
counts = Set()
for line in sys.stdin:
    line = line.rstrip('\n').split(")")
    name = line[0].split()[0]
    weight = int(line[0].split("(")[1])
    words = line[1].split(", ")
    if words == ['']: words = []
    else: words[0] = words[0][4:]
    program[name] = {'weight': weight, 'words': words, 'total': 0}
    for w in words:
        counts.add(w)

base = ""

for p in program.keys():
    if p not in counts:
        base = p
        break

print base
problem = 0


def getWeight(name):
    p = program[name]

Пример #3

Показать файл

Файл: app.py Проект: wsgan001/KSTR

def core3(swLng, swLat, neLng, neLat, selectUid, rawkeywords, stime, etime):
    startTime = timer.time()
    timelist = []
    keywords = rawkeywords.split(',')

    def KM(k, p):
        category = POIDict[p]['category']
        if k == category:
            tf = 1
        else:
            return 0

        idf = FBCategoryPOICountDict[k]
        AT = FB_ATDict[k]
        score = (tf / float(idf)) * AT
        return score

    def cal_re_in(select_rid, tuples):
        Total_PATS = 0.0
        Total_Timescore = 0.0
        Total_SocialINF = 0.0
        Total_KM = 0.0
        AllPOI = []

        if len(tuples) <= POILength:
            return
        for t in tuples:
            Total_PATS += t[2]
            Total_Timescore += t[3]
            Total_SocialINF += t[4]
            pid = t[0]
            time = t[1]
            lat = POIDict[pid]['latitude']
            lng = POIDict[pid]['longitude']
            name = POIDict[pid]['name']
            category = POIDict[pid]['category']
            link = POIDict[pid]['link']
            likes = POIDict[pid]['likes']
            checkins = POIDict[pid]['checkins']

            for keyword in keywords:
                Total_KM += KM(keyword, pid)

            POITuple = {
                'pid': pid,
                'time': time,
                'coor': [lat, lng],
                'name': name,
                'category': category,
                'link': link,
                'likes': likes,
                'checkins': checkins
            }
            # POITuple = {
            # 'pid': pid, 'time': time, 'coor': [lat, lng], 'name': name}
            AllPOI.append(POITuple)

        rScore = 0.0

        rScore = Total_PATS + Total_Timescore + Total_SocialINF
        POICount = len(tuples)
        cover = 0.0

        avg_rScore = float(rScore) / POICount

        reconstructionFlag = True
        tup = uid, orignal_rid, select_rid, rScore, avg_rScore, 0, 0, 0, 0, 0, 0, 0, Total_PATS, Total_Timescore, Total_SocialINF, float(
            Total_PATS) / POICount, float(Total_Timescore) / POICount, float(
                Total_SocialINF) / POICount, 0, 0, AllPOI, Total_KM
        return tup

    def getMinAndMax(lats, longs):
        return str(min(lats)), str(min(longs)), str(max(lats)), str(max(longs))

    def splitIntoHeadDict(route):
        length = len(route)
        for i in xrange(length):
            if i == length - 1:
                break
            headID = route[i][0]
            tailID = route[i + 1][0]
            head = POIDataDict[headID]
            tail = POIDataDict[tailID]
            if head[0] == tail[0] or head[1] >= tail[1]:
                continue
            if head[0] not in splitDict:
                splitDict[head[0]] = Set([])
                splitDict[head[0]].add((head, tail))
            else:
                splitDict[head[0]].add((head, tail))

    def construct(pairSet):
        endflag = False

        global construct_c
        global reconstructionIdx

        for i in pairSet:

            if len(tempStack) == 0:
                tempStack.append(i[0])
                tempStack.append(i[1])
            else:
                idx = len(tempStack) - 1

                if i[1][1] > tempStack[idx][1] and i[1][0] != tempStack[idx][0]:
                    tempStack.append(i[0])
                    tempStack.append(i[1])
                else:
                    tempStack.append(i[0])
                    endflag = True

            headSet = splitDict.get(i[0][0], None)
            tailSet = splitDict.get(i[1][0], None)

            if tailSet != None and endflag == False:
                tempStack.pop()
                construct(tailSet)

                if len(tempStack) <= 1:
                    pass
                else:
                    x_p = []
                    tempStack_cpy = []
                    score = 0.0
                    for i in tempStack:
                        x_p.append(i[0])
                        tempStack_cpy.append(i)
                        score += (i[2] + i[3] + i[4])

                    if tuple(x_p) not in prefixSet:
                        #make prefix
                        prefix = []
                        length = len(x_p)
                        for i in xrange(length):
                            if i < length - 1:
                                prefix.append(x_p[i])
                                prefixSet.add(tuple(prefix))

                        if tuple(x_p) not in reconstructionInput_p:
                            if tuple(x_p) in orignalRouteScore:
                                if score > orignalRouteScore[tuple(x_p)]:
                                    r_rid = 'Reconstruct_' + str(
                                        reconstructionIdx)
                                    reconstructionOutput.append(
                                        cal_re_in(r_rid, tempStack))
                                    reconstructionIdx += 1
                                    reconstructionInput_p.add(tuple(x_p))
                            else:
                                r_rid = 'Reconstruct_' + str(reconstructionIdx)
                                reconstructionOutput.append(
                                    cal_re_in(r_rid, tempStack))
                                reconstructionIdx += 1
                                reconstructionInput_p.add(tuple(x_p))
                tempStack.pop()
            elif endflag == True:
                if len(tempStack) == 1:
                    pass
                else:
                    x_p = []
                    tempStack_cpy = []
                    score = 0.0
                    for i in tempStack:
                        x_p.append(i[0])
                        tempStack_cpy.append(i)
                        score += (i[2] + i[3] + i[4])

                    if tuple(x_p) not in prefixSet:
                        #make prefix
                        prefix = []
                        length = len(x_p)
                        for i in xrange(length):
                            if i < length - 1:
                                prefix.append(x_p[i])
                                prefixSet.add(tuple(prefix))

                        if tuple(x_p) not in reconstructionInput_p:
                            if tuple(x_p) in orignalRouteScore:
                                if score > orignalRouteScore[tuple(x_p)]:
                                    r_rid = 'Reconstruct_' + str(
                                        reconstructionIdx)
                                    reconstructionOutput.append(
                                        cal_re_in(r_rid, tempStack))
                                    reconstructionIdx += 1
                                    reconstructionInput_p.add(tuple(x_p))
                            else:
                                r_rid = 'Reconstruct_' + str(reconstructionIdx)
                                reconstructionOutput.append(
                                    cal_re_in(r_rid, tempStack))
                                reconstructionIdx += 1
                                reconstructionInput_p.add(tuple(x_p))

            else:
                if len(tempStack) == 1:
                    pass
                else:
                    x_p = []
                    tempStack_cpy = []
                    score = 0.0
                    for i in tempStack:
                        x_p.append(i[0])
                        tempStack_cpy.append(i)
                        score += (i[2] + i[3] + i[4])

                    if tuple(x_p) not in prefixSet:
                        #make prefix
                        prefix = []
                        length = len(x_p)
                        for i in xrange(length):
                            if i < length - 1:
                                prefix.append(x_p[i])
                                prefixSet.add(tuple(prefix))

                        if tuple(x_p) not in reconstructionInput_p:
                            if tuple(x_p) in orignalRouteScore:
                                if score > orignalRouteScore[tuple(x_p)]:
                                    r_rid = 'Reconstruct_' + str(
                                        reconstructionIdx)
                                    reconstructionOutput.append(
                                        cal_re_in(r_rid, tempStack))
                                    reconstructionIdx += 1
                                    reconstructionInput_p.add(tuple(x_p))
                            else:
                                r_rid = 'Reconstruct_' + str(reconstructionIdx)
                                reconstructionOutput.append(
                                    cal_re_in(r_rid, tempStack))
                                reconstructionIdx += 1
                                reconstructionInput_p.add(tuple(x_p))
                        for i in xrange(2):
                            tempStack.pop()
                            if len(tempStack) <= 1:
                                pass
                            else:
                                x_p = []
                                tempStack_cpy = []
                                score = 0.0
                                for i in tempStack:
                                    x_p.append(i[0])
                                    tempStack_cpy.append(i)
                                    score += (i[2] + i[3] + i[4])

                                if tuple(x_p) not in prefixSet:
                                    #make prefix
                                    prefix = []
                                    length = len(x_p)
                                    for i in xrange(length):
                                        if i < length - 1:
                                            prefix.append(x_p[i])
                                            prefixSet.add(tuple(prefix))

                                    if tuple(x_p) not in reconstructionInput_p:
                                        if tuple(x_p) in orignalRouteScore:
                                            if score > orignalRouteScore[tuple(
                                                    x_p)]:
                                                r_rid = 'Reconstruct_' + \
                                                    str(reconstructionIdx)
                                                reconstructionOutput.append(
                                                    cal_re_in(
                                                        r_rid, tempStack))
                                                reconstructionIdx += 1
                                                reconstructionInput_p.add(
                                                    tuple(x_p))
                                        else:
                                            r_rid = 'Reconstruct_' + \
                                                str(reconstructionIdx)
                                            reconstructionOutput.append(
                                                cal_re_in(r_rid, tempStack))
                                            reconstructionIdx += 1
                                            reconstructionInput_p.add(
                                                tuple(x_p))

    # def if_dominate(check, test):
    #     if check == test:
    #         return True
    #     for i in xrange(len(check)):
    #         if check[i] > test[i]:
    #             return True

    #     return False

    # def cal_dominate(input):
    #     check = input[0]
    #     inputd = input[1]
    #     all_dominate = True
    #     for test in inputd:
    #         if if_dominate(check[1], test[1]) == False:
    #             all_dominate = False
    #             return None

    #     return check

    pro = 0.1
    POILength = 2
    conn_string = "host='192.168.100.200' dbname='moonorblue' user='******' password='******'"
    conn = psycopg2.connect(conn_string)
    cur = conn.cursor()

    qByRegion = "SELECT poi,rid FROM fb_route WHERE geom && st_makeenvelope(" + str(
        swLng
    ) + "," + str(swLat) + "," + str(neLng) + "," + str(
        neLat
    ) + ",4326) AND st_area(geom) != 0 AND (st_area(st_intersection(geom,st_makeenvelope(" + str(
        swLng) + "," + str(swLat) + "," + str(neLng) + "," + str(
            neLat) + ",4326)))) != 0;"
    cur.execute(qByRegion)
    qByRegion_rows = [r for r in cur]
    timelist.append('Query:' + str(timer.time() - startTime))
    startTime = timer.time()
    orignalCategory = Set([])
    orignalPOI = Set([])

    minlong = float(swLng)
    minlat = float(swLat)
    maxlong = float(neLng)
    maxlat = float(neLat)

    uid = str(selectUid)
    orignal_rid = 0
    fids = RelationDict.get(uid, [])
    fids = Set(fids)

    reconstruction_start = timer.time()
    splitDict = {}

    reconstructionInput = []
    reconstructionOutput = []
    reconstructionOutputSet = Set([])
    global reconstructionIdx
    reconstructionIdx = 0

    scoreD = {}
    POIScoreDict = {}
    POIDataDict = {}
    c = 0
    qCount = 0
    for r in qByRegion_rows:
        # break if too much result
        if qCount > 10000:
            break
        POIs = eval(r[0])
        rid = r[1]
        localInput = []
        score = 0.0
        for POI in POIs:
            pid = POI['pid']

            latitude = POIDict[pid]['latitude']
            longitude = POIDict[pid]['longitude']
            if latitude >= minlat and latitude <= maxlat and longitude >= minlong and longitude <= maxlong:
                PATS = POI['PATS']
                timescore = POI['timeScore']
                socialINF = 0.0
                KMs = 0.0
                time = int(
                    datetime.datetime.fromtimestamp(float(
                        POI['time'])).strftime('%H')) + 8
                if time > 24:
                    time = time - 24
                category = POIDict[pid]['category']
                visiters = Set(POIDict[pid]['visiters'])
                # select social influnce score
                for v in visiters:
                    if str(v) not in fids:
                        continue
                    new_u = FBsInfIdxDict[uid]
                    new_f = FBsInfIdxDict[v]
                    scores = FBsInfMatrix[new_u][new_f]
                    socialINF += float(scores)
                for keyword in keywords:
                    KMs += KM(keyword, pid)
                score += (PATS + timescore + socialINF + KMs)

                localInput.append(
                    (pid, time, PATS, timescore, socialINF, category))

                if pid not in POIScoreDict:
                    POIScoreDict[pid] = PATS + timescore + socialINF + KMs
                    POIDataDict[pid] = (pid, time, PATS, timescore, socialINF,
                                        category)
                else:
                    if (PATS + timescore + socialINF +
                            KMs) > POIScoreDict[pid]:
                        POIDataDict[pid] = (pid, time, PATS, timescore,
                                            socialINF, category)
            else:
                continue

        scoreD[c] = score
        c += 1
        qCount += 1
        reconstructionInput.append(tuple(localInput))
    timelist.append('POI:' + str(timer.time() - startTime))
    startTime = timer.time()
    sorted_scoreD = sorted(scoreD.items(),
                           key=operator.itemgetter(1),
                           reverse=True)
    limit = pro * len(sorted_scoreD)
    chosedInput = []
    reconstructionInput_p = []
    orignalRouteScore = {}

    for i in xrange(int(limit)):
        chosedInput.append(sorted_scoreD[i][0])

    for i in chosedInput:

        splitIntoHeadDict(reconstructionInput[i])
        t = []
        score = 0.0
        for x in reconstructionInput[i]:
            t.append(x[0])
            score += (x[2] + x[3] + x[4])
        orignalRouteScore[tuple(t)] = score

    reconstructionInput_p = Set(reconstructionInput_p)
    prefixSet = Set()

    for i in splitDict:
        tempStack = []
        construct_c = 0
        construct(splitDict[i])
    timelist.append('Construct:' + str(timer.time() - startTime))
    startTime = timer.time()
    routeList = []
    result = []
    qCoverTime = 0.0
    ScoringTime = 0.0
    ProcessTime = 0.0
    qCount = 0
    for row_r in qByRegion_rows:
        # break if too much result
        if qCount > 10000:
            break
        process_start_time = timer.time()
        select_rid = row_r[1]

        if orignal_rid == select_rid:
            continue

        cover = 0
        POIs = eval(row_r[0])
        if len(POIs) <= POILength:
            continue

        rScore = 0.0
        recommendCategory = Set([])
        socialFlag = False
        POICount = len(POIs)
        recommendPOI = Set([])
        scoring_start_time = timer.time()

        Total_PATS = 0.0
        Total_Timescore = 0.0
        Total_SocialINF = 0.0
        Total_KM = 0.0

        AllPOI = []
        for POI in POIs:
            pid = POI['pid']
            PATS = POI['PATS']
            timescore = POI['timeScore']
            time = int(
                datetime.datetime.fromtimestamp(float(
                    POI['time'])).strftime('%H')) + 8
            if time > 24:
                time = time - 24

            socialINF = 0.0

            latitude = POIDict[pid]['latitude']
            longitude = POIDict[pid]['longitude']

            if latitude >= minlat and latitude <= maxlat and longitude >= minlong and longitude <= maxlong:
                visiters = Set(POIDict[pid]['visiters'])
                # select social influnce score
                for v in visiters:
                    if str(v) not in fids:
                        continue
                    new_u = FBsInfIdxDict[uid]
                    new_f = FBsInfIdxDict[v]
                    scores = FBsInfMatrix[new_u][new_f]
                    socialFlag = True
                    socialINF += float(scores)

                Total_PATS += PATS
                Total_Timescore += timescore
                Total_SocialINF += socialINF
                for keyword in keywords:
                    Total_KM += KM(keyword, pid)

                pScore = PATS + timescore + socialINF
                rScore += pScore

            name = POIDict[pid]['name']
            category = POIDict[pid]['category']
            link = POIDict[pid]['link']
            likes = POIDict[pid]['likes']
            checkins = POIDict[pid]['checkins']
            POITuple = {
                'pid': pid,
                'time': time,
                'coor': [latitude, longitude],
                'name': name,
                'category': category,
                'link': link,
                'likes': likes,
                'checkins': checkins
            }
            # POITuple = {'pid': pid, 'time': time , 'coor': [latitude, longitude], 'name': name}
            AllPOI.append(POITuple)

        avg_rScore = float(rScore) / POICount

        poi_hitCount = 0
        poiHit = 0
        editdistance = 0
        hitCount = 0
        categoryHit = 0
        ScoringTime = 0
        ProcessTime = 0

        reconstructionFlag = False
        tup = uid, orignal_rid, select_rid, rScore, avg_rScore, categoryHit, cover, poi_hitCount, poiHit, socialFlag, ScoringTime, ProcessTime, Total_PATS, Total_Timescore, Total_SocialINF, float(
            Total_PATS) / POICount, float(Total_Timescore) / POICount, float(
                Total_SocialINF
            ) / POICount, editdistance, reconstructionFlag, AllPOI, Total_KM
        result.append(tup)
        qCount += 1

    result += reconstructionOutput
    timelist.append('Scoring:' + str(timer.time() - startTime))
    startTime = timer.time()
    result = [d for d in result if d is not None]
    # pool_size = 8  # your "parallelness"

    # _pool = Pool(pool_size)
    # xxx = _pool.map(cal,qByRegion_rows)

    #remove subsequence
    # POISequence = []
    # for i in result:
    #     if i is not None:
    #         POIs = i[20]
    #         Seq = ''
    #         for POI in POIs:
    #             Seq += POI['pid']
    #             Seq += ','
    #         POISequence.append(Seq)

    # seqIdx = 0
    # for seq in POISequence:
    #     for seqq in POISequence:
    #         if seq == seqq:
    #             continue
    #         if seq in seqq:
    #             result[seqIdx] = None
    #             break
    #     seqIdx += 1

    # timelist.append('Remove subseq:'+str(timer.time()-startTime))
    # startTime = timer.time()
    result_new = []
    #time constraint here!!!
    if stime == 'Anytime' and etime == 'Anytime':
        for i in result:
            if i is not None:
                POIs = i[20]
                nTuple = i + (POIs, )
                result_new.append(nTuple)
        pass
    elif stime == 'Anytime':
        endTime = int(etime.replace(':00', ''))
        for i in result:
            if i is not None:
                POIs = i[20]
                newPOIs = []
                for POI in POIs:
                    time = POI['time']
                    if int(time) <= endTime:
                        newPOIs.append(POI)
                if len(newPOIs) > 1:
                    nTuple = i + (newPOIs, )
                    result_new.append(nTuple)
    elif etime == 'Anytime':
        startTime = int(stime.replace(':00', ''))
        for i in result:
            if i is not None:
                POIs = i[20]
                newPOIs = []
                for POI in POIs:
                    time = POI['time']
                    if int(time) >= startTime:
                        newPOIs.append(POI)
                if len(newPOIs) > 1:
                    nTuple = i + (newPOIs, )
                    result_new.append(nTuple)
    else:
        startTime = int(stime.replace(':00', ''))
        endTime = int(etime.replace(':00', ''))
        for i in result:
            if i is not None:
                POIs = i[20]
                newPOIs = []
                for POI in POIs:
                    time = POI['time']
                    if int(time) >= startTime and int(time) <= endTime:
                        newPOIs.append(POI)
                if len(newPOIs) > 1:
                    nTuple = i + (newPOIs, )
                    result_new.append(nTuple)
    timelist.append('Time:' + str(timer.time() - startTime))
    startTime = timer.time()

    skylineInputDict = {}
    skylineInputValue = []
    skylineInputDict_avg = {}
    skylineInputValue_avg = []
    idxCount = 0
    for t in result_new:
        rid = t[2]
        ScoringTime += t[10]
        ProcessTime += t[11]
        # skylineT = (t[12], t[13], t[14])
        skylineT_avg = (t[15], t[16], t[17], t[21])
        # skylineInputDict[(rid, skylineT)] = idxCount
        skylineInputDict_avg[(rid, skylineT_avg)] = idxCount
        # skylineInputValue.append((rid, skylineT))
        skylineInputValue_avg.append((rid, skylineT_avg))
        idxCount += 1

    # _pool = Pool(8)
    r = _pool.map(
        cal_dominate,
        itertools.izip(skylineInputValue_avg,
                       itertools.repeat(skylineInputValue_avg)))
    # r = [i for i in r if i is not None]
    resultData_avg = [
        result_new[skylineInputDict_avg[i]] for i in r if i is not None
    ]
    # pool_skyline.close()
    # pool_skyline.join()
    # r = []

    # for i in skylineInputValue_avg:
    #     all_dominate = True
    #     for j in skylineInputValue_avg:
    #         if if_dominate(i[1], j[1]) == False:
    #             all_dominate = False
    #             break
    #     if all_dominate:
    #         r.append(i)

    # resultData_avg = [result_new[skylineInputDict_avg[i]] for i in r]
    timelist.append('Skyline:' + str(timer.time() - startTime))
    startTime = timer.time()

    startTime = timer.time()
    #sorting by PATS
    sorted_by_PATS = sorted(result_new, reverse=True,
                            key=lambda tup: tup[12])[:len(resultData_avg)]
    #sorting by timescore
    sorted_by_timescore = sorted(result_new,
                                 reverse=True,
                                 key=lambda tup: tup[13])[:len(resultData_avg)]
    #sorting by socialINF
    sorted_by_socialINF = sorted(result_new,
                                 reverse=True,
                                 key=lambda tup: tup[14])[:len(resultData_avg)]
    #sorting by KM
    sorted_by_KM = sorted(result_new, reverse=True,
                          key=lambda tup: tup[21])[:len(resultData_avg)]

    resultPOI_skyline = [i[22] for i in resultData_avg]
    resultPOI_skyline_ori = [
        i[22] for i in resultData_avg if 'Reconstruct_' not in str(i[2])
    ]
    resultPOI_skyline_re = [
        i[22] for i in resultData_avg if 'Reconstruct_' in str(i[2])
    ]
    resultPOI_PATS = [i[22] for i in sorted_by_PATS]
    resultPOI_timescore = [i[22] for i in sorted_by_timescore]
    resultPOI_socialINF = [i[22] for i in sorted_by_socialINF]
    resultPOI_KM = [i[22] for i in sorted_by_KM]
    timelist.append('Sort:' + str(timer.time() - startTime))
    startTime = timer.time()
    return resultPOI_skyline, resultPOI_PATS, resultPOI_timescore, resultPOI_socialINF, resultPOI_skyline_ori, resultPOI_skyline_re, resultPOI_KM

Пример #4

Показать файл

Файл: analysis_integrated.py Проект: cthoyt-forks-and-packages/BONITA

def compareIndividualsNodeWise(truthList, testList, model1s, model2s, covs,
                               equivs):

    modeler = model1s[0]
    SDs = [0. for q in truthList]
    nodeSDs = []
    edgeSens, inDegrees, edgePPVs = [], [], []
    inCoV = []
    TPsum, TNsum, FPsum, FNsum = 0, 0, 0, 0
    for node in range(0, len(modeler.nodeList)):
        tempSD = 0.
        FP, TP, TN, FN = 0, 0, 0, 0
        # simplify rules at the node and find the edge-wise PPV, sens, and SDs
        inCovTemper = []
        for k in range(len(truthList)):
            inCovtemp = []
            # find start and end of this node in each model
            start1, end1 = findEnds(model1s[k], node, truthList[k])
            start2, end2 = findEnds(model2s[k], node, testList[k])

            # find the shadow and nodes for each model
            truthInEdges = findInEdges(model1s[k], node)
            testInEdges = findInEdges(model2s[k], node)

            # find the bitstring for just this node
            truth = truthList[k][start1:end1]
            test = testList[k][start2:end2]

            # simplify ground truth and recovered rules
            truth = simplifyRule(truth, truthInEdges)
            test = simplifyRule(test, testInEdges)

            # edit overall rule list with simplified rules
            testList[k][start2:end2] = test
            truthList[k][start1:end1] = truth

            # find SD, PPV, etc....
            truthSet = Set([])  # edges in correct rule
            testSet = Set([])  # edges in rule found
            baseSet = Set([])  # edges possible across all rules

            # find edges in true rule (and edges possible), average incoming coefficient of variation
            for i in range(0, len(truth)):
                if truth[i] == 1:
                    for nodeToAdd in model1s[k].andNodeList[node][i]:
                        truthSet.add(nodeToAdd)
                        inCovtemp.append(covs[k][node])
                for nodeToAdd in model1s[k].andNodeList[node][i]:
                    baseSet.add(nodeToAdd)
            # find edges in test (recovered) rule
            for i in range(0, len(test)):
                if test[i] == 1:
                    for nodeToAdd in model2s[k].andNodeList[node][i]:
                        testSet.add(nodeToAdd)
            # find structural distance at this node.
            SDs[k] = SDs[k] + len(truthSet.difference(testSet)) + len(
                testSet.difference(truthSet))
            tempSD = tempSD + len(truthSet.difference(testSet)) + len(
                testSet.difference(truthSet))
            # save edge-wise statistics for this node
            FP += 1. * len(testSet.difference(truthSet))
            TP += 1. * len(testSet.intersection(truthSet))
            FN += 1. * len(truthSet.difference(testSet))
            inCovTemper.append(numpy.mean(inCovtemp))
        # calculae and save overall edge-wise statistics
        if (TP + FN) > 0:
            sensitivity = 1. * TP / (TP + FN)
        else:
            sensitivity = 100
        if TP + FP > 0:
            PPV = 1. * TP / (TP + FP)
        else:
            PPV = 100
        nodeSDs.append(tempSD / len(truthList))
        edgeSens.append(sensitivity)
        edgePPVs.append(PPV)
        TPsum += TP
        FNsum += FN
        FPsum += FP
        inDegrees.append(len(baseSet))
        inCoV.append(numpy.mean(inCovTemper))
    if (TPsum + FNsum) > 0:
        edgeSens = 1. * TPsum / (TPsum + FNsum)
    else:
        edgeSens = 100
    if (FPsum + TPsum) > 0:
        edgePPV = 1. * TPsum / (FPsum + TPsum)
    else:
        edgePPV = 100

    nodeSens = []  # sensitivity by node
    nodePPV = []  # PPV by node
    nodeRTs = []  # rules true by node
    nodePsens = []
    nodepPPV = []
    nodelister = model1s[
        0].nodeList  # gives a node List (should be the same across all trials in a network...)
    sampleRTs = [[] for item in truthList]  # Rules True for each trial
    samplePPVs = [[] for item in truthList]  # PPV for each trial
    sampleSenss = [[] for item in truthList]  # Sens for each trial
    equivRTsens = [[] for item in truthList
                   ]  # RT sensitivity of equivalents for each trial
    equivSens = [[] for item in truthList
                 ]  # sensitivity for equivalents for each trial
    equivNodeRTsens = []
    equivNodeSens = []

    # iterate over all nodes in the network
    for node in range(len(nodelister)):
        rtTemp = []  # stores rules true for this node across all networks
        ones = []  # stores the number of false negative and rules
        zeros = []  # stores the number of  correct and rules
        negones = []  # stores the number of false positive and rules
        equivOnes = [
        ]  # stores the min number of false negatives across equivs
        equivZeros = []  # stores the max correct across equivs
        equivNegOnes = []  # stores the min false positives across equivs
        sumindividual = []  # total number true positive and rules
        equivRTsensNode = []
        equivSensNode = []

        #loop over individuals provided and calculate sens, PPV, rules true
        for i in range(len(truthList)):

            # find start and end of this node in each model
            start1, end1 = findEnds(model1s[i], node, truthList[i])
            start2, end2 = findEnds(model2s[i], node, testList[i])

            # find the values for just this node
            truth = truthList[i][start1:end1]
            test = testList[i][start2:end2]

            # set up empty lists for ands, edges, and the shadow and nodes associated with this node in each model
            truthAnds = []
            testAnds = []

            # get the set of all shadow and nodes that are actually used in each rule
            for j in range(len(model1s[i].andNodeList[node])):
                if truth[j] > 0:
                    truthAnds.append(tuple(model1s[i].andNodeList[node][j]))
            for j in range(len(model2s[i].andNodeList[node])):
                if test[j] > 0:
                    testAnds.append(tuple(model2s[i].andNodeList[node][j]))
            truthAnd = tuple(truthAnds)
            truthAnd = set(truthAnd)
            testAnd = set(tuple(testAnds))

            # get the set of all shadow and nodes used in each equivalent rule
            equivAnds = []
            # print(equivs[i])
            for test1 in equivs[i][node]:
                tempEquivAnd = []
                for j in range(len(model2s[i].andNodeList[node])):
                    if test1[j] > 0:
                        tempEquivAnd.append(
                            tuple(model2s[i].andNodeList[node][j]))
                testAnd1 = set(tuple(tempEquivAnd))
                equivAnds.append(testAnd1)
            RTequiv = 0.
            possibilityOnes = []
            possibilityZeros = []
            possibilityZNetones = []
            for testAnder1 in equivAnds:
                if (truthAnd == testAnder1):
                    RTequiv = 1.
                possibilityOnes.append(len(truthAnd.difference(testAnd)))
                possibilityZeros.append(len(truthAnd.intersection(testAnd)))
                possibilityZNetones.append(len(testAnd.difference(truthAnd)))
            # append results for this trial to all results
            maxpossibilityZeros = max(possibilityZeros)
            minpossiblityOnes = min(possibilityOnes)
            minpossibilityNegOnes = min(possibilityZNetones)
            equivOnes.append(minpossiblityOnes)
            equivZeros.append(maxpossibilityZeros)
            equivNegOnes.append(minpossibilityNegOnes)
            equivRTsensNode.append(RTequiv)
            equivRTsens[i].append(RTequiv)
            # calculate true positives, false positives, false negatives, and total slots for this node, trial and save
            onetemp = len(truthAnd.difference(testAnd))
            zerotemp = len(truthAnd.intersection(testAnd))
            negonetemp = len(testAnd.difference(truthAnd))
            sumindtemp = len(truthAnd)
            ones.append(onetemp)
            zeros.append(zerotemp)
            negones.append(negonetemp)
            sumindividual.append(sumindtemp)
            # add Rules true first sample-wise then node-wise
            if len(model1s[i].andNodeList[node]) > 1:
                if (truthAnd == testAnd):
                    sampleRTs[i].append(1.)
                else:
                    sampleRTs[i].append(0.)
                if (sumindtemp - onetemp + negonetemp) > 0:
                    samplePPVs[i].append(1. * (sumindtemp - onetemp) /
                                         (sumindtemp - onetemp + negonetemp))
                else:
                    samplePPVs[i].append(100)
                if (sumindividual[i]) > 0:
                    sampleSenss[i].append(1. * (sumindtemp - onetemp) /
                                          (sumindtemp))
                else:
                    sampleSenss[i].append(100)
            if (truthAnd == testAnd):
                rtTemp.append(1.)
            else:
                rtTemp.append(0.)

        nodeRTs.append(numpy.mean(rtTemp))  # node-wise Rules true added
        equivNodeRTsens.append(numpy.mean(equivRTsensNode))

        # calculate sensitivity for the node
        temp = [
            100 if sumindividual[i] == 0 else 1. *
            (sumindividual[i] - ones[i]) / (sumindividual[i])
            for i in range(0, len(ones))
        ]
        temp = filter(lambda a: a != 100, temp)
        if len(temp) == 0:
            sensitivity = 100
        else:
            sensitivity = (1. * numpy.sum(temp) / len(temp))

        # calculate max sensitivity for the node
        temp = [
            100 if sumindividual[i] == 0 else 1. *
            (sumindividual[i] - equivOnes[i]) / (sumindividual[i])
            for i in range(0, len(equivOnes))
        ]
        temp = filter(lambda a: a != 100, temp)
        if len(temp) == 0:
            psensitivity = 100
        else:
            psensitivity = (1. * numpy.sum(temp) / len(temp))
        nodePsens.append(psensitivity)

        # calculate PPV for the node
        temp = [
            100 if (sumindividual[i] - ones[i] +
                    negones[i]) == 0 else 1. * (sumindividual[i] - ones[i]) /
            (sumindividual[i] - ones[i] + negones[i])
            for i in range(0, len(ones))
        ]
        temp = filter(lambda a: a != 100, temp)
        if len(temp) == 0:
            PPV = 100
        else:
            PPV = (1. * numpy.sum(temp) / len(temp))

        # calculate PPV for the node
        temp = [
            100 if
            (sumindividual[i] - equivOnes[i] + equivNegOnes[i]) == 0 else 1. *
            (sumindividual[i] - equivOnes[i]) /
            (sumindividual[i] - equivOnes[i] + equivNegOnes[i])
            for i in range(0, len(equivOnes))
        ]
        temp = filter(lambda a: a != 100, temp)
        if len(temp) == 0:
            pPPV = 100
        else:
            pPPV = (1. * numpy.sum(temp) / len(temp))
        nodepPPV.append(pPPV)

        # add to list of sensitivity and PPV by
        nodeSens.append(sensitivity)
        nodePPV.append(PPV)
    sampleEquivRT = [
        1. * numpy.mean(filter(lambda a: a != 100, sampler))
        for sampler in equivRTsens
    ]  # Rules True for each trial
    sampleRT = [
        1. * numpy.mean(filter(lambda a: a != 100, sampler))
        for sampler in sampleRTs
    ]  # Rules True for each trial
    samplePPV = [
        1. * numpy.mean(filter(lambda a: a != 100, sampler))
        for sampler in samplePPVs
    ]  # PPV for each trial
    sampleSens = [
        1. * numpy.mean(filter(lambda a: a != 100, sampler))
        for sampler in sampleSenss
    ]  # Sens for each trial
    return sampleEquivRT, equivNodeRTsens, nodePsens, nodepPPV, sampleSens, samplePPV, nodeSens, nodePPV, sampleRT, nodeRTs, edgeSens, edgePPV, SDs, nodeSDs, len(
        modeler.nodeList), inDegrees, inCoV

Пример #5

Показать файл

Файл: dbSNP2data.py Проект: bopopescu/gwasmodules

    def write_data_matrix(self,
                          data_matrix,
                          output_fname,
                          strain_id_list,
                          snp_id_list,
                          snp_id2acc,
                          with_header_line,
                          nt_alphabet,
                          strain_id2acc=None,
                          strain_id2category=None,
                          rows_to_be_tossed_out=Set(),
                          strain_id2other_info=None,
                          discard_all_NA_strain=0,
                          predefined_header_row=[
                              'strain', 'duplicate', 'latitude', 'longitude',
                              'nativename', 'stockparent', 'site', 'country'
                          ]):
        """
		2008-05-08
			defunct use write_data_matrix from pymodule
		2007-02-19
			if strain_id2acc is available, translate strain_id into strain_acc,
			if strain_id2category is available, add 'category'
		2007-02-25
			if one strain's SNP row is all NA, it'll be skipped
		2007-02-25
			add argument rows_to_be_tossed_out
		2007-09-23
			add discard_all_NA_strain
		2007-10-22
			add no_of_all_NA_rows
		2007-12-13
			add predefined_header_row
		2007-12-16
			add 'duplicate' into predefined_header_row
		"""
        sys.stderr.write("Writing data_matrix ...")
        no_of_all_NA_rows = 0
        writer = csv.writer(open(output_fname, 'w'), delimiter='\t')
        if with_header_line:
            header_row = [predefined_header_row[0]]
            if strain_id2category:
                header_row.append(predefined_header_row[1])
            if strain_id2other_info:
                no_of_fields = len(
                    strain_id2other_info.values()[0])  #2007-12-13
                for i in range(no_of_fields):
                    header_row.append(predefined_header_row[2 + i])
            for snp_id in snp_id_list:
                header_row.append(snp_id2acc[snp_id])
            writer.writerow(header_row)
        for i in range(len(data_matrix)):
            if strain_id2acc:
                new_row = [strain_id2acc[strain_id_list[i]]]
            else:
                new_row = [strain_id_list[i]]
            if strain_id2category:
                new_row.append(strain_id2category[strain_id_list[i]])
            if strain_id2other_info:
                new_row += strain_id2other_info[strain_id_list[i]]
            if discard_all_NA_strain and sum(
                    data_matrix[i] == 0) == data_matrix.shape[1]:
                no_of_all_NA_rows += 1
                continue
            elif i not in rows_to_be_tossed_out:  #2007-02-25
                for j in data_matrix[i]:
                    if nt_alphabet:
                        j = number2nt[j]
                    new_row.append(j)
                writer.writerow(new_row)
        del writer
        sys.stderr.write("%s all NA rows ." % no_of_all_NA_rows)
        sys.stderr.write("Done.\n")

Пример #6

Показать файл

Файл: delicious.py Проект: maks-developer/shoebot

        self.href = Href(str(item.uri))
        subject = str(model.get_target(item, dc.subject))
        self.tags = Set([])
        if subject is not None:
            self.tags = Set([Tag(x) for x in subject.split(" ")])
        if tag is not None:
            self.tags.add(tag)


if __name__ == '__main__':
    import sys
    from sets import Set

    username = sys.argv[1]
    user = User(username)
    tags = Set()
    users = {}

    print("Reading " + username + " posts...")
    for post in user:
        for tag in post.tags:
            tags.add(tag)

        other_tags = Set()
        count = 0
        for other_post in post.href:
            u = other_post.user
            if not u == user:
                count += 1
                if u not in users:
                    users[u] = []

Пример #7

Показать файл

Файл: count_emojis.py Проект: jonleonATX/slackscrape

import re
from json_utils import load_json
import operator
import os
import pprint
from sets import Set
from itertools import islice

def take(n, iterable):
    return list(islice(iterable, n))

allowed_chars = Set('0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_')

master_count = {}

def add_hit(channel, hit_type, hit):
    if not master_count.get(channel):
        master_count[channel] = {
            'emojis': {},
            'emojis_reactions': {},
        }

    if not master_count.get(channel).get(hit_type).get(hit):
        master_count[channel][hit_type][hit] = 0

    master_count[channel][hit_type][hit] += 1


def filter_emojis(text):
    return Set(text.replace(':', '')).issubset(allowed_chars) and len(text) > 2

Пример #8

Показать файл

def puzzle(grid_size, old_grid_size, nr_words):
    print "NR WORDS", nr_words
    iter = 0
    png_nr = 0
    # log_file = open(output_directory + r"\log.txt", "w")
    #sample words to follow
    follow_inds = Set()
    while len(follow_inds) < nr_words_to_follow:
        follow_inds.add(random.randrange(nr_words))
    follow_inds = list(follow_inds)

    # log_file.write("Indexes:")
    # for key, value in global_index.iteritems():
    # log_file.write(str(key)+" " + value.name + " " + str(value.id) + "\n")

    print "\n========\nSTART PUZZLING\n===========\n"
    log_file = open(log_file_n, 'a')
    log_file.write("\n\n========\nSTART PUZZLING\n===========\n\n")
    log_file.close()
    trial_nr = 0
    nr_inits = 0
    elem_indexes = range(nr_words)
    grid_size = int(grid_size)
    while not stop_condition(trial_nr):
        for i in range(nr_trials_check):
            if trial_nr % 5 == 0:
                print "\nTRIAL", trial_nr, datetime.datetime.now()
                log_file = open(log_file_n, 'a')
                log_file.write("TRIAL " + str(trial_nr) + " " +
                               str(datetime.datetime.now()) + "\n")
                log_file.close()
            # check if you need to reinitialize
            if iter % (nr_words * nr_trials_re_init) == 0 or iter == 0:
                nr_inits += 1
                print "\ninit closest trial", trial_nr, "     start:", datetime.datetime.now(
                )
                log_file = open(log_file_n, 'a')
                log_file.write("init closest at " +
                               str(datetime.datetime.now()) + "\n")
                log_file.close()
                init_closest(grid_size, old_grid_size, iter == 0)
                print "init closest stop:", datetime.datetime.now()
                log_file = open(log_file_n, 'a')
                log_file.write("stop init closest at " +
                               str(datetime.datetime.now()) + "\n")
                log_file.close()
                # log_file.write("INITIALIZED\n\n")
                # print_all_lists(str(trial_nr))
            if iter == 0:
                stats_to_file("FIRST", trial_nr, follow_inds, nr_inits,
                              grid_size, png_nr)
                png_nr += 1
            # pick random element
            random.shuffle(elem_indexes)
            for elem_i in elem_indexes:
                [x, y] = list(global_index[elem_i].pos)

                if iter % 5000 == 0:
                    print "iter", iter,

                swap_value = float("-inf")
                # check with which neighbor it wants to swap
                for dx in range(neighbor_range_swap[0],
                                neighbor_range_swap[1]):
                    for dy in range(neighbor_range_swap[0],
                                    neighbor_range_swap[1]):
                        # print x, dx, y, dy
                        if x + dx >= 0 and x + dx < grid_size and y + dy >= 0 and y + dy < grid_size:
                            # check grid elem != none
                            # print "in check"
                            if grid_f[x + dx][y + dy] != None:
                                v = grid_f[x][y].get_improvement(
                                    x + dx, y + dy) + grid_f[x + dx][
                                        y + dy].get_improvement(x, y)
                            else:
                                v = grid_f[x][y].get_improvement(
                                    x + dx, y + dy)
                            if v > swap_value:
                                # process swap value
                                swap_value = v
                                swap_x = x + dx
                                swap_y = y + dy

                if swap_value > 0:
                    xy = grid_f[x][y]
                    xy_swap = grid_f[swap_x][swap_y]
                    grid_f[x][y] = xy_swap
                    grid_f[swap_x][swap_y] = xy
                    xy.change_pos(swap_x, swap_y)
                    if xy_swap != None:
                        xy_swap.change_pos(x, y)
                elif swap_value == float("-inf"):
                    print "-inf"

                iter += 1

            # figures and stats to file
            if trial_nr % to_file_trials == 0 and trial_nr != 0:
                stats_to_file(iter, trial_nr, follow_inds, nr_inits, grid_size,
                              png_nr)
                png_nr += 1
            trial_nr += 1
    stats_to_file("LAST", trial_nr, follow_inds, nr_inits, grid_size, png_nr)

Пример #9

Показать файл

            hg.interval_list([i[0]]).intersection(amplist) +
            hg.interval_list([i[0]]).intersection(rdList)) > 0
    ])
    rdList = hg.interval_list([
        hg.interval(i.chrom, max(0, i.start - 10000),
                    min(i.end + 10000, hg.chrLen[hg.chrNum(i.chrom)]))
        for i in rdList
    ])
    iout = open(outName + '.integration_search.out', 'w')
    iout.write(mystdout.getvalue())
    iout.close()
    sys.stdout = old_stdout

irdhops = []
irddict = {}
irdSets = Set([Set([ird]) for ird in rdList])
irdgroupdict = {ird: Set([ird]) for ird in rdList}
if args.extendmode == 'EXPLORE' or args.extendmode == 'VIRAL':
    for ird in rdList:
        logging.info("#TIME " + '%.3f\t' % (time() - TSTART) +
                     "Exploring interval: " + str(ird))
        old_stdout = sys.stdout
        sys.stdout = mystdout = StringIO()
        ilist = bamFileb2b.interval_hops(ird)
        irdhops.append((ird, ilist))
        for i in ilist:
            irddict[i] = ird
        iout = open(
            outName + '.' + ird.chrom + ":" + str(ird.start) + '-' +
            str(ird.end) + '.out', 'w')
        iout.write(mystdout.getvalue())

Пример #10

Показать файл

Файл: androsim.py Проект: tinhgin/mod-androwarn

def check_one_file(a, d1, dx1, FS, threshold, file_input, view_strings=False, new=True, library=True):
    d2 = None
    ret_type = androconf.is_android( file_input )
    if ret_type == "APK":
        a = apk.APK( file_input )
        d2 = dvm.DalvikVMFormat( a.get_dex() )
    elif ret_type == "DEX":
        d2 = dvm.DalvikVMFormat( read(file_input) )

    if d2 == None:
      return
    dx2 = analysis.VMAnalysis( d2 )

    el = elsim.Elsim( ProxyDalvik(d1, dx1), ProxyDalvik(d2, dx2), FS, threshold, options.compressor, libnative=library )
    el.show()
    print "\t--> methods: %f%% of similarities" % el.get_similarity_value(new)


    if options.dump:
        print '\nDumping smali code...'
        tmp1 = options.input[1].split('/')
        jarname = tmp1[len(tmp1)-1]
        if not os.path.exists('smali'):
            os.makedirs('smali')
        os.system('apktool d ' + options.input[1])
        if jarname[len(jarname)-4:len(jarname)] == '.apk':
            os.system('mv -f ' + jarname[0:len(jarname)-4] + ' smali')
        else:
            os.system('mv -f ' + jarname + '.out ' + 'smali')


        classes = Set([])
        diff_methods = el.get_similar_elements()
        for i in diff_methods:
            x = el.show_similar_class_name( i )
            for j in range(0, len(x)):
                classes.add(x.pop())

        new_methods = el.get_new_elements()
        for i in new_methods:
            y = el.show_new_class_name( i )
            classes.add(y)

        if not os.path.exists('codedump'):
            os.makedirs('codedump')
        os.chdir('codedump')

        if os.path.exists(jarname):
            os.system('rm -rf ' + jarname)
        os.makedirs(jarname)
        os.chdir('..')
        for i in range(0,len(classes)):
            #os.makedirs('codedump/' + jarname)
            filepath = classes.pop()
            filename = filepath.replace('/','.')
            shutil.copy2('smali/' + jarname + '.out/smali/' + filepath, 'codedump/' + jarname + '/' + filename)
        os.system('rmdir codedump/' + jarname)



        classes1 = Set([])
        for i in diff_methods:
            x = el.show_similar_method_name( i )
            for j in range(0, len(x)):
                classes1.add(x.pop())
        for i in new_methods:
            y = el.show_new_method_name( i )
            classes1.add(y)
        start = ''
        end = '.end method'
        if not os.path.exists('methoddump'):
            os.makedirs('methoddump')
        
        for i in range(0,len(classes1)):
            x1 = classes1.pop()
            xx = x1.split(' ', 1)
            if not os.path.exists('methoddump/' + jarname):
                os.makedirs('methoddump/' + jarname)
            with open('codedump/' + jarname + '/' + xx[0]) as infile:
                for line in infile:
                    if xx[1] in line:
                        start = line.replace('\n','')
                        break
            med = xx[1].split('(', 1)[0]
            with open('codedump/' + jarname + '/' + xx[0]) as infile, open('methoddump/' + jarname + '/' + xx[0] + '.' + med + '.method', 'w+') as outfile:
                copy = False
                outfile.write(start + '\n')
                for line1 in infile:                    
                    if line1.strip() == start:
                        copy = True
                    elif line1.strip() == end:
                        copy = False
                    elif copy:
                        outfile.write(line1)
                outfile.write(end)






        print 'DUMP SMALI CODE SUCCESSFULLY.'


    if options.display:
        print "SIMILAR methods:"
        diff_methods = el.get_similar_elements()
        for i in diff_methods:
            el.show_element( i )

        print "IDENTICAL methods:"
        new_methods = el.get_identical_elements()
        for i in new_methods:
            el.show_element( i )

        print "NEW methods:"
        new_methods = el.get_new_elements()
        for i in new_methods:
            el.show_element( i, False )

        print "DELETED methods:"
        del_methods = el.get_deleted_elements()
        for i in del_methods:
            el.show_element( i )

        print "SKIPPED methods:"
        skipped_methods = el.get_skipped_elements()
        for i in skipped_methods:
            el.show_element( i )

    if view_strings:
        els = elsim.Elsim( ProxyDalvikStringMultiple(d1, dx1),
                           ProxyDalvikStringMultiple(d2, dx2),
                           FILTERS_DALVIK_SIM_STRING,
                           threshold,
                           options.compressor,
                           libnative=library )
        #els = elsim.Elsim( ProxyDalvikStringOne(d1, dx1),
        #    ProxyDalvikStringOne(d2, dx2), FILTERS_DALVIK_SIM_STRING, threshold, options.compressor, libnative=library )
        els.show()
        print "\t--> strings: %f%% of similarities" % els.get_similarity_value(new)

        if options.display:
          print "SIMILAR strings:"
          diff_strings = els.get_similar_elements()
          for i in diff_strings:
            els.show_element( i )

          print "IDENTICAL strings:"
          new_strings = els.get_identical_elements()
          for i in new_strings:
            els.show_element( i )

          print "NEW strings:"
          new_strings = els.get_new_elements()
          for i in new_strings:
            els.show_element( i, False )

          print "DELETED strings:"
          del_strings = els.get_deleted_elements()
          for i in del_strings:
            els.show_element( i )

          print "SKIPPED strings:"
          skipped_strings = els.get_skipped_elements()
          for i in skipped_strings:
            els.show_element( i )

Пример #11

Показать файл

Файл: 21.py Проект: rajiv256/CompetitiveProgramming

from math import sqrt as sqrt


def sod(n):
    sum = 1
    for i in range(2, int(sqrt(n))):
        if (n % i == 0):
            sum += i
            sum += n / i
    sq = int(sqrt(n))
    if (n % sq == 0):
        sum += sq
    return sum


l = [0 for i in range(10000)]
ans = []
for i in range(1, 10000):
    l[i] = sod(i)
    if (l[i] < i):
        if (l[l[i]] == i):
            ans.append(i)
            ans.append(l[i])
from sets import Set
ans = Set(ans)
print ans
x = sum(ans)
print x

Пример #12

Показать файл

Файл: load_score_model.py Проект: dan-l/heroku-mapviz

 def get_years(self):
     years = Set()
     incidents = Incident.objects.all()
     for inc in incidents:
         years.add(inc.year)
     return years

Пример #13

Показать файл

def deaccent(s):
    return s \
      .replace(u'ά', u'α') \
      .replace(u'έ', u'ε') \
      .replace(u'ή', u'η') \
      .replace(u'ί', u'ι') \
      .replace(u'ό', u'ο') \
      .replace(u'ύ', u'υ') \
      .replace(u'ώ', u'ω') \
      .replace(u'ς', u'σ')


import os
crawlerdir = os.environ['CRAWLERDIR']

expletives = Set()
with open(crawlerdir + "greekdata/expletives", "r") as f:
    for line in f:
        expletives.add(deaccent(unicode(line, 'utf-8').strip().lower()))
articles = Set()
with open(crawlerdir + "greekdata/articles", "r") as f:
    for line in f:
        articles.add(deaccent(unicode(line, 'utf-8').strip().lower()))
pronouns = Set()
with open(crawlerdir + "greekdata/pronouns", "r") as f:
    for line in f:
        pronouns.add(deaccent(unicode(line, 'utf-8').strip().lower()))
locations = Set()
with open(crawlerdir + "greekdata/locations", "r") as f:
    for line in f:
        locations.add(deaccent(unicode(line, 'utf-8').strip().lower()))

Пример #14

Показать файл

Файл: feature_extraction_ngram.py Проект: superuser5/Resilient-ML-Research-Platform

def feat_extr_ngram(row_id_str, hdfs_dir_list, hdfs_feat_dir, model_data_folder
    , sp_master, spark_rdd_compress, spark_driver_maxResultSize, sp_exe_memory, sp_core_max
    , zipout_dir, zipcode_dir, zip_file_name 
    , mongo_tuples, fromweb, label_arr, metadata_count,label_idx,data_idx, pattern_str, ln_delimitor, data_field_list, jkey_dict
    , jobname, num_gram, feature_count_threshold, token_dict=None, HDFS_RETR_DIR=None, remove_duplicated="N"
    , cust_featuring=None, cust_featuring_params=None, local_out_dir=None, filter_ratio=None
    ): 

    # zip func in other files for Spark workers ================= ================
    zip_file_path=ml_util.ml_build_zip_file(zipout_dir, zipcode_dir, zip_file_name, user_custom=cust_featuring)
    # get_spark_context
    sc=ml_util.ml_get_spark_context(sp_master
        , spark_rdd_compress
        , spark_driver_maxResultSize
        , sp_exe_memory
        , sp_core_max
        , jobname
        , [zip_file_path]) 
    # log time ================================================================ ================
    t0 = time()

    # input filename
    input_filename="*"
    ext_type='.gz'
    gz_list=None
    convert2dirty="N"
    if not ',' in hdfs_dir_list: # single dir having *.gz ==== =========
        # read raw data from HDFS as .gz format ========== 
        rdd_files=os.path.join(hdfs_dir_list, input_filename+ext_type)
        # check if gz files in hdfs ============
        try:
            gz_list=hdfs.ls(hdfs_dir_list)
            print "INFO: check hdfs folder=",hdfs_dir_list

        except IOError as e:
            print "WARNING: I/O error({0}): {1}".format(e.errno, e.strerror)
        except:
            print "WARNING: Error at checking HDFS file:", sys.exc_info()[0]     
        # use whole folder
        if gz_list is None or len(gz_list)==0:
            rdd_files=hdfs_dir_list
            print "ERROR: No file found by ",input_filename+ext_type #,", use",hdfs_dir_list,"instead"    
            return -2
    else: # multiple dirs ==== =========
        rdd_files=""
        cnt=0
        temp_lbl_list=[]
        comma=""
        print "INFO: before label_arr=",label_arr
        
        # check each folder
        for dr in hdfs_dir_list.split(','):
            print "****=",dr
            if not len(dr)>0:
                continue
            try:
                # remove space etc.
                dr=dr.strip()
                fdr=os.path.join(HDFS_RETR_DIR, dr)
                print "fdr=",fdr
                # ls didn't like "*"
                if '*' in fdr:
                    #gz_list=hdfs.ls(fdr.replace("*",""))
                    dn=os.path.dirname(fdr).strip()
                    bn=os.path.basename(fdr).strip()
                    #print "dn=",dn,",bn=",bn
                    # get all names under folder and do filtering
                    gz_list=fnmatch.filter(hdfs.ls(dn), '*'+bn)
                    #print "gz_list=",gz_list
                else:
                    gz_list=hdfs.ls(fdr)
                cnt=cnt+len(gz_list)
                
                if len(gz_list)>0:
                    rdd_files=rdd_files+comma+fdr
                    comma=","
            except IOError as e:
                print "WARNING: I/O error({0}): {1}".format(e.errno, e.strerror)
            except:
                print "WARNING: Error at checking HDFS file:", sys.exc_info()[0]     
        # use whole folder
        if cnt is None or cnt==0:
            print "ERROR: No file found at",rdd_files
            return -2
        else:
            print "INFO: total file count=",cnt
        # set convert flag only when multiple dir and label_arr has dirty label
        #if label_arr is None: # create label arr if None
        #    label_arr=temp_lbl_list
        if not label_arr is None and len(label_arr)==2 and label_arr[1]=="dirty":
            convert2dirty="Y"
    print "INFO: rdd_files=",rdd_files

    txt_rdd=sc.textFile(rdd_files)#, use_unicode=False
    
    total_input_count=txt_rdd.count()
    print "INFO: Total input sample count=",total_input_count
    # debug only
    #for x in txt_rdd.collect():
    #    print "t=",x
    print "INFO: hdfs_dir_list=",hdfs_dir_list
    print "INFO: label_arr=",label_arr
    print "INFO: feature_count_threshold=",feature_count_threshold
    
    #jkey_dict={"meta_list":["label","md5","mdate"], "data_key":"logs"}
    #   this dict depends on the format of input data
    if not data_field_list is None:
        jkey_dict=json.loads(jkey_dict)
        
        data_key=jkey_dict["data_key"]
        meta_list=jkey_dict["meta_list"]
        
        metadata_count=len(meta_list)
        data_idx=metadata_count
        print "INFO: jkey_dict=",jkey_dict
        print "INFO: meta_list=",meta_list
        print "INFO: data_key=",data_key
        print "INFO: data_field_list=",data_field_list
        print "INFO: metadata_count=",metadata_count

        featured_rdd = txt_rdd \
            .map(lambda x: preprocess_json(x,meta_list,data_key,data_field_list)) \
            .filter(lambda x: len(x) > metadata_count) \
            .filter(lambda x: type(x[metadata_count]) is list) \
            .map(lambda x: feature_extraction_ngram(x, data_idx, MAX_FEATURES, num_gram)) \
            .filter(lambda x: len(x) > metadata_count) \
            .filter(lambda x: type(x[metadata_count]) is dict) \
            .filter(lambda x: type(x[metadata_count+1]) is dict) \
            .filter(lambda x: len(x[metadata_count])> int(feature_count_threshold) ) \
            .cache()
            
        #print "INFO: featured_rdd="
        #for x in featured_rdd.collect():
        #    print "INFO: **** f=",x
    # user custom code for featuring  ============================================= ==========
    #   input txt_rdd format (string):  each text row for each sample 
    #   output featured_rdd format (list):[meta-data1,meta-data2,..., hash_cnt_dic, hash_str_dic]
    elif not cust_featuring is None and len(cust_featuring)>0:
        user_module=None
        user_func=None
        user_func_dnn=None
        # load user module =======
        try:
            modules = map(__import__, [CUSTOM_PREFIX+cust_featuring])
            user_module=modules[0]
            user_func=getattr(user_module,CUSTOM_FUNC)
        except Exception as e:
            print "ERROR: user module error.", e.__doc__, e.message
            return -101
        # prepare for dnn, output as feat in an array    
        tmp_rdd = txt_rdd.map(lambda x: user_func(x, cust_featuring_params)) \
            .filter(lambda x: len(x) > metadata_count) \
            .filter(lambda x: type(x[metadata_count]) is list).cache()
        # for traditional ML, feat in a dict 
        featured_rdd = tmp_rdd \
            .map(lambda x: feature_extraction_ngram(x, data_idx, MAX_FEATURES, num_gram)) \
            .filter(lambda x: len(x) > metadata_count) \
            .filter(lambda x: type(x[metadata_count]) is dict) \
            .filter(lambda x: type(x[metadata_count+1]) is dict) \
            .filter(lambda x: len(x[metadata_count])> int(feature_count_threshold) ) \
            .cache()
                
        all_hashes_cnt_dic=None
        all_hash_str_dic=None
        all_hashes_seq_dic = None
    else:
        print "INFO: pattern_str=",pattern_str+"<--"
        print "INFO: ln_delimitor=",ln_delimitor+"<--"
        print "INFO: label_idx=",label_idx
        print "INFO: data_idx=",data_idx
        print "INFO: metadata_count=",metadata_count
        print "INFO: filter_ratio=",filter_ratio        
        
        # filter top and least percentage of feature
        if not filter_ratio is None and filter_ratio > 0 and filter_ratio <1:
            # check total count here before continue
            upper_cnt=total_input_count*(1-filter_ratio)
            lower_cnt=total_input_count*filter_ratio
            # set limit for lower bound. if total count is large, lower_cnt may exclude all features...
            # max lower count =  min( MAX_FILTER_LOWER_CNT, total_input_count/100 ) 
            if not MAX_FILTER_LOWER_CNT is None and lower_cnt > MAX_FILTER_LOWER_CNT:
                if MAX_FILTER_LOWER_CNT > total_input_count/100:
                    lower_cnt=total_input_count/100
                else:
                    lower_cnt=MAX_FILTER_LOWER_CNT


            print "INFO: filtering by count, upper bound=",upper_cnt,",lower bound=",lower_cnt
            # find unique feature, count them, remove them if in highest and lowest % and then create a dict 
            f_feat_set = Set (txt_rdd.map(lambda x:x.split(ln_delimitor)).flatMap(lambda x:Set(x[metadata_count:])) \
                .map(lambda x:(x,1)).reduceByKey(lambda a, b: a + b) \
                .filter(lambda x:x[1]<= upper_cnt and x[1]>= lower_cnt) \
                .map(lambda x:x[0]).collect() )
                
            print "INFO: f_feat_set len=",len(f_feat_set)
            broadcast_f_set = sc.broadcast(f_feat_set)

            #txt_rdd=txt_rdd.map(lambda x: filter_by_list(x, metadata_count,ln_delimitor, broadcast_f_list.value ))
            txt_rdd=txt_rdd.map(lambda x: x.split(ln_delimitor)) \
                        .map(lambda x: x[:metadata_count]+ [w for w in x[metadata_count:] if w and w in broadcast_f_set.value]) \
                        .map(lambda x: ln_delimitor.join(x))
        
        
        # preprocess by pattern matching and then extract n-gram features   #.encode('UTF8')
        #   input txt_rdd format (string):  meta-data1\tmeta-data2\t...\tdataline1\tdataline2\t...datalineN\n
        #   output featured_rdd format (list):[meta-data1,meta-data2,..., hash_cnt_dic, hash_str_dic]
        #       hash_cnt_dic: {hash,hash:count,...}  hash_str_dic: {hash: 'str1',... }
        tmp_rdd = txt_rdd \
            .map(lambda x: preprocess_pattern(x, metadata_count, pattern_str, ln_delimitor \
                                                , label_idx, label_arr, convert2dirty )) \
            .filter(lambda x: len(x) > metadata_count) \
            .filter(lambda x: type(x[metadata_count]) is list) #.cache() memory issue...
        #tmp_rdd_count=tmp_rdd.count()
        #print "INFO: After preprocessing count=",tmp_rdd_count
        featured_rdd = tmp_rdd \
            .map(lambda x: feature_extraction_ngram(x, data_idx, MAX_FEATURES, num_gram)) \
            .filter(lambda x: len(x) > metadata_count) \
            .filter(lambda x: type(x[metadata_count]) is dict) \
            .filter(lambda x: type(x[metadata_count+1]) is dict) \
            .filter(lambda x: len(x[metadata_count])> int(feature_count_threshold) ) \
            .cache()
        #feat_rdd_count=featured_rdd.count()
        #print "INFO: After featuring count=",feat_rdd_count

        all_hashes_cnt_dic=None
        all_hash_str_dic=None
        all_hashes_seq_dic = None
    
    #get all hashes and total occurring count ===============
    #   all_hashes_cnt_dic: {'hash,hash': total count,... }
    if all_hashes_cnt_dic is None:
        #all_hashes_cnt_dic = featured_rdd.map(lambda x: x[metadata_count]).reduce(lambda a, b: combine_dic_cnt(a, b))
        all_hashes_cnt_dic = dict(featured_rdd.flatMap(lambda x: x[metadata_count].items()).reduceByKey(lambda a, b: a + b).collect())
    
    #get all hashes and their extracted string  ===============
    #   all_hash_str_dic: {hash:'str1', ...
    if all_hash_str_dic is None:
        #all_hash_str_dic = featured_rdd.map(lambda x: x[metadata_count+1]).reduce(lambda a, b: combine_dic(a, b))
        all_hash_str_dic = dict(featured_rdd.flatMap(lambda x: x[metadata_count+1].items()).distinct().collect())
    
    # get all labels into an array  =============== provided by parameter?
    if label_arr is None:
        # will force "clean" be 0 here
        label_arr=sorted(featured_rdd.map(lambda x: x[label_idx].lower()).distinct().collect())
        # debug only
        print "INFO: label_arr=",json.dumps(sorted(label_arr))
    
    # save labels to hdfs as text file==================================== ============
    hdfs_folder = hdfs_feat_dir #+ "/"   # "/" is needed to create the folder correctly
    print "INFO: hdfs_folder=", hdfs_folder
    try:
        hdfs.mkdir(hdfs_folder)
    except IOError as e:
        print "WARNING: I/O error({0}): {1}".format(e.errno, e.strerror)
    except:
        print "WARNING: Unexpected error at mkdir:", sys.exc_info()[0]     
    
    # clean up metadata_file
    metadata_file = os.path.join(hdfs_folder , metadata) #"metadata"
    print "INFO: metadata_file=", metadata_file
    try:
        hdfs.rmr(metadata_file)
    except IOError as e:
        print "WARNING: I/O error({0}): {1}".format(e.errno, e.strerror)
    except:
        print "WARNING: Unexpected error at rmr():", sys.exc_info()[0]     
    sc.parallelize(label_arr,1).saveAsTextFile(metadata_file)
    
    #remap all hash values to continuous key/feature number ==============
    #     all_hashes_seq_dic: { hash : sequential_numb }
    if all_hashes_seq_dic is None:
        all_hashes_seq_dic={}
        remap2seq(all_hashes_cnt_dic, all_hashes_seq_dic)   #all_hashes_seq_dic has continuous key number
    #print "all_hashes_seq_dic=",all_hashes_seq_dic
    total_feature_numb=len(all_hashes_seq_dic)
    print "INFO: Total feature count=", len(all_hashes_seq_dic)

    # featured_rdd (list):    [meta-data1,meta-data2,..., hash_cnt_dic, hash_str_dic]
    # seq_featured_rdd(list): [meta-data1,meta-data2,..., hash_cnthsh_dict, hash_str_dic] (feat id in sorted sequence)
    # hash_cnt_dic: {hash: count}  hash_str_dic: {hash: 'str1,str2...' }
    #     set binary_flag to True, all feature:value will be 1
    broadcast_dic = sc.broadcast(all_hashes_seq_dic)
    seq_featured_rdd = featured_rdd.map(lambda x: convert2seq(x,label_idx,data_idx,broadcast_dic.value,binary_flag= True)).cache() 
    
    # get hash_cnthsh_dict then flatMap and reduce to (feat id, count)
    ct_rdd=seq_featured_rdd.flatMap(lambda x: [(i[0],i[1]) for i in x[data_idx].iteritems()]).reduceByKey(lambda a, b: a + b)
    # sorted by feature id as int
    feat_sample_count_arr=ct_rdd.sortBy(lambda x:int(x[0])).map(lambda x:x[1]).collect()
    # sort after collect may fail when rdd is huge
    #feat_sample_count_arr=[]
    #for i in sorted(ct_rdd.collect(), key=lambda t: int(t[0])):
    #    feat_sample_count_arr.append(i[1])
    print "INFO: feat_sample_count_arr len=",len(feat_sample_count_arr)
    
    # save feat_sample_count_arr data ==================================== ============
    filter='{"rid":'+row_id_str+',"key":"feat_sample_count_arr"}'
    upsert_flag=True
    jo_insert={}
    jo_insert["rid"]=eval(row_id_str)
    jo_insert["key"]="feat_sample_count_arr"
    jo_insert["value"]=feat_sample_count_arr
    jstr_insert=json.dumps(jo_insert)
    ret=query_mongo.upsert_doc_t(mongo_tuples,filter,jstr_insert,upsert_flag)
    print "INFO: Upsert count for feat_sample_count_arr=",ret
    # insert failed, save to local
    if ret==0:
        # drop old record in mongo
        ret=query_mongo.delete_many(mongo_tuples,None,filter)
        if not os.path.exists(local_out_dir):
            os.makedirs(local_out_dir)
        fsca_hs=os.path.join(local_out_dir,row_id_str,row_id_str+"_feat_sample_count_arr.pkl")
        print "WARNING: save feat_sample_count_arr to local"
        ml_util.ml_pickle_save(feat_sample_count_arr, fsca_hs)
   
    # save feature data; TBD. not used. ==================================== ============
    
    #libsvm_rdd=seq_featured_rdd.map(lambda x: convert2libsvm(x,label_idx,data_idx,label_arr))
    # put hash to the front of each row, assume hash is after label
    libsvm_rdd=seq_featured_rdd.map(lambda x: x[label_idx+1]+" "+convert2libsvm(x,label_idx,data_idx,label_arr))
    # debug only
    #print "libsvm_rdd="
    #for i in libsvm_rdd.collect():
    #    print i

    # get rdd statistics info
    stats= featured_rdd.map(lambda p: len(p[metadata_count])).stats()
    feat_count_max=stats.max()
    feat_count_stdev=stats.stdev()
    feat_count_mean=stats.mean()
    sample_count=stats.count()
    print "INFO: libsvm data: sample count=",sample_count,",Feat count mean=",feat_count_mean,",Stdev=",feat_count_stdev
    print "INFO:   ,max feature count=",feat_count_max
    # find sample count
    lbl_arr=featured_rdd.map(lambda x: (x[label_idx],1)).reduceByKey(add).collect()
    print "INFO: Sample count by label=",lbl_arr

    
    # remove duplicated libsvm string; only keep the first duplicated item, assume space following key_idx
    if remove_duplicated=="Y":
        libsvm_rdd=libsvm_rdd \
            .map(lambda x: ( ','.join(x.split(' ')[metadata_count:]), x)) \
            .groupByKey().map(lambda x: list(x[1])[0] ) \
            .cache()        
        cnt_list= libsvm_rdd.map(lambda x: (x.split(' ')[1],1)).reduceByKey(add).collect()
        stats= libsvm_rdd.map(lambda x: len(x.split(' ')[metadata_count:])).stats()
        feat_count_max=stats.max()
        feat_count_stdev=stats.stdev()
        feat_count_mean=stats.mean()
        sample_count=stats.count()
        print "INFO: Non-Duplicated libsvm data: sample count=",sample_count,",Feat count mean=",feat_count_mean,",Stdev=",feat_count_stdev
        print "INFO:   ,max feature count=",feat_count_max
        print "INFO: Non-Duplicated Label count list=",cnt_list
        
    # save libsvm data ==================================== ============
    libsvm_data_file = os.path.join(hdfs_folder , libsvm_alldata_filename) #"libsvm_data"
    print "INFO: libsvm_data_file=", libsvm_data_file
    try:
        #hdfs.ls(save_dir)
        #print "find hdfs folder"
        hdfs.rmr(libsvm_data_file)
        if num_gram == 1: 
            hdfs.rmr(dnn_data_file)
        #print "all files removed"
    except IOError as e:
        print "WARNING: I/O error({0}): {1} at libsvm_data_file clean up".format(e.errno, e.strerror)
    except:
        print "WARNING: Unexpected error at libsvm file clean up:", sys.exc_info()[0]     
    
    #codec = "org.apache.hadoop.io.compress.GzipCodec"
    #libsvm_rdd.saveAsTextFile(libsvm_data_file, codec)  
    libsvm_rdd.saveAsTextFile(libsvm_data_file) # TBD encrypted
    
    feat_count_file = libsvm_data_file+"_feat_count"
    print "INFO: feat_count_file=", feat_count_file
    try:
        hdfs.rmr(feat_count_file)
    except IOError as e:
        print "WARNING: I/O error({0}): {1} at feat_count clean up".format(e.errno, e.strerror)
    except:
        print "WARNING: Unexpected error at libsvm feature count clean up:", sys.exc_info()[0]     
    sc.parallelize([total_feature_numb],1).saveAsTextFile(feat_count_file)

    label_dic = {}
    # assign label a number
    for idx, label in enumerate(sorted(label_arr)):
        if not label in label_dic:
            label_dic[label] = idx      #starting from 0, value = idx, e.g., clean:0, dirty:1
    
    # output text for DNN:[meta-data1,meta-data2,..., [feature tokens]] ================= DNN ===========
    if num_gram == 1: # special flag to tokenize and keep input orders
        print "INFO: processing data for DNN..."
        # create token dict
        # str_hash_dict: string to hash
        # all_hashes_seq_dic: hash to seq id
        if token_dict is None or len(token_dict)==0:
            token_dict={}
            str_hash_dict={v: k for k, v in all_hash_str_dic.iteritems()}
            for k,v in str_hash_dict.iteritems():
                token_dict[k]=int(all_hashes_seq_dic[str(v)])
            #print "token_dict=",len(token_dict),token_dict
        
        dnn_rdd = tmp_rdd \
            .map(lambda x: tokenize_by_dict(x, data_idx, token_dict,label_idx, label_dic)) \
            .filter(lambda x: len(x) > metadata_count) \
            .filter(lambda x: type(x[metadata_count]) is list) 
            #.cache()
            # filter duplication here
        #print dnn_rdd.take(3)
        dnn_data_file = os.path.join(hdfs_folder , dnn_alldata_filename) #"dnn_data"
        print "INFO: dnn_data_file=", dnn_data_file
        try:
            hdfs.rmr(dnn_data_file)
        except IOError as e:
            print "WARNING: I/O error({0}): {1} at dnn_data_file clean up".format(e.errno, e.strerror)
        except:
            print "WARNING: Unexpected error at libsvm file clean up:", sys.exc_info()[0]
        try:
            dnn_rdd.saveAsTextFile(dnn_data_file)
        except:
            print "WARNING: Unexpected error at saving dnn data:", sys.exc_info()[0]
                
        try:
            stats= dnn_rdd.map(lambda p: len(p[metadata_count])).stats()
            feat_count_max=stats.max()
            feat_count_stdev=stats.stdev()
            feat_count_mean=stats.mean()
            sample_count=stats.count()
            print "INFO: DNN data: sample count=",sample_count,",Feat count mean=",feat_count_mean,",Stdev=",feat_count_stdev
            print "INFO:   ,max feature count=",feat_count_max
        except:
            print "WARNING: Unexpected error at getting stats of dnn_rdd:", sys.exc_info()[0]

        
    
    # clean up pca data in hdfs ============ ========================
    pca_files= '*'+libsvm_alldata_filename+"_pca_*"
    #print "INFO: pca_files=", pca_files
    try:
        f_list=hdfs.ls(hdfs_folder)
        if len(f_list)>0:
            df_list=fnmatch.filter(f_list,pca_files)
            for f in df_list:
                print "INFO: rm ",f
                hdfs.rmr(f)
    except IOError as e:
        print "WARNING: I/O error({0}): {1}".format(e.errno, e.strerror)
    except:
        print "WARNING: Unexpected error at libsvm pca file clean up:", sys.exc_info()[0]     

    # clean up pca data in web local ============ ========================       
    pca_fname=os.path.join(model_data_folder , row_id_str+'_pca_*.pkl*')
    print "INFO: pca_fname=", pca_fname
    
    try:
        for fl in glob.glob(pca_fname):
            print "INFO: remove ", fl
            os.remove(fl)
    except OSError, e:
        print ("Error: %s - %s." % (e.pca_fname,e.strerror))

Пример #15

Показать файл

'''
Prior Audience Sample, to compare levels
0 = Weak
1 = Medium
2 = Strongest
'''


def _getSample():
    return random.randrange(MIN_VAL, MAX_VAL + 1)


prior = 1  #initiliaze the richard
jokesAndResponses = {}  #set of told jokes, and their response
jokesTold = 0
heuristics = Set()
'''
Main Function Here
'''


class heuristic(object):
    def __init__(self, t):
        self.type = t
        self.fails = 0
        self.prob = 100  #out of a hundred percent

    def getInfo(self):
        return [self.type, self.fails, self.prob]

    def getType(self):

Пример #16

Показать файл

def readGraph(file, n, p, mean, std_dev, PView, PShare, content_count):
    G1 = LoadEdgeList(PUNGraph, file, 0, 1)
    n = G1.GetNodes()
    CmtyVt = TCnComV()

    #Getting the Community
    CommunityCNM(G1, CmtyVt)
    nodes = TIntV()
    for N in G1.GetNI(0).GetOutEdges():
        nodes.Add(N)

    G1 = GetSubGraph(G1, nodes)

    #Drawing the original Community Graph
    #DrawGViz(G1, gvlDot, "graph1.png", "graph 1")

    Graph = {}
    for u in G1.Nodes():
        for v in u.GetOutEdges():
            if Graph.has_key(u.GetId()):
                Graph[u.GetId()].add(v)
            else:
                Graph.update({u.GetId(): Set([v])})

    #Initialize the probability Vectors.
    for i in range(0, n):
        PView += [[0 for j in range(0, n)]]
        PShare += [[0 for j in range(0, n)]]

    #Populating the probability vectors.
    for v in G1.Nodes():
        for u in v.GetOutEdges():
            id_src = v.GetId()
            id_dst = u
            view_prob = np.random.binomial(n, p, 1)[0] / (n * 1.0)
            share_prob = np.random.binomial(n, p, 1)[0] / (n * 1.0)
            PView[id_dst][id_src] = view_prob
            PShare[id_dst][id_src] = share_prob

    #Content forest each entry in this array is a forest for some content.
    content_forest = []

    # 4039 * 0.148 #Number of cotent introduction point.
    content_intro_count = 60

    #Generating forest for each content.
    for i in range(0, content_count):

        #Generating the random introduction points i.e. the users who introduce the content.
        random_sample = random.sample(range(0, n), content_intro_count)

        #Generating the forest for a content
        content_forest.append(BFS(Graph, random_sample, PView, PShare))

    new_Graph = {}
    weight = {}
    G2 = TNGraph.New()
    for i in range(0, n):
        G2.AddNode(i)

    #Generating the inferred graph
    for cf in content_forest:
        for e in cf.Edges():
            if new_Graph.has_key(e.GetSrcNId()) and e.GetDstNId() in new_Graph[
                    e.GetSrcNId()]:
                weight[str(e.GetSrcNId()) + ',' +
                       str(e.GetDstNId())] = weight[str(e.GetSrcNId()) + ',' +
                                                    str(e.GetDstNId())] + 1
            elif new_Graph.has_key(e.GetSrcNId()):
                new_Graph[e.GetSrcNId()].add(e.GetDstNId())
                weight.update(
                    {str(e.GetSrcNId()) + ',' + str(e.GetDstNId()): 1})
                G2.AddEdge(e.GetSrcNId(), e.GetDstNId())
            else:
                new_Graph.update({e.GetSrcNId(): Set([e.GetDstNId()])})
                weight.update(
                    {str(e.GetSrcNId()) + ',' + str(e.GetDstNId()): 1})
                G2.AddEdge(e.GetSrcNId(), e.GetDstNId())

    #Sum of Weights of all the neighbours of a vertex
    TWeight = {}
    for u in new_Graph.keys():
        sum = 0
        for v in new_Graph[u]:
            sum += weight[str(u) + ',' + str(v)]
            TWeight.update({u: sum})

#Calculating Edge Confidence
    for u in new_Graph.keys():
        for v in new_Graph[u]:
            weight[str(u) + ',' +
                   str(v)] = weight[str(u) + ',' + str(v)] / (TWeight[u] * 1.0)

    #calculating conf_threshold
    conf_thershold = mean + 0 * std_dev

    #Generating the graph whose edges have conf_value greate than conf_threshold
    G3 = TUNGraph.New()
    nodes = []
    for u in new_Graph.keys():
        for v in new_Graph[u]:
            if weight[str(u) + ',' + str(v)] >= conf_thershold:
                if u not in nodes:
                    G3.AddNode(u)
                    nodes += [u]
                if v not in nodes:
                    G3.AddNode(v)
                    nodes += [v]
                G3.AddEdge(u, v)

    #Drawing the inferred Graph
    DrawGViz(G3, gvlDot, "graph3.png", "graph 2")

Пример #17

Показать файл

 def __init__(self, h):
     self.allH = Set()
     for elm in h:
         self.allH.add(heuristic(elm))

Пример #18

Показать файл

Файл: external_program_test_framework.py Проект: AlexLai1990/PythonExternalProgramTestFramework

class ExternalProgramTestSuite:
    """ A Class for creating Test Suites with
    test cases which call external programs 
    """
    # internal static variables
    _test_suites = {}
    _num_formatting_chars = 100
    _all_log_files = Set()
    _has_run = False
    _framework_output_file = None
    # public static variables
    color_output_text = True
    suite_header_color = Fore.MAGENTA
    case_header_color = Fore.CYAN
    suite_result_header_color = Fore.YELLOW

    def __init__(self, **kwargs):
        # reset the suite variables
        self._set_suite_defaults()
        # test suite name is the name of the suite class
        # or the suite_name arg if passed
        if 'suite_name' in kwargs:
            if assert_variable_type(kwargs['suite_name'], str):
                self.suite_name = kwargs['suite_name']
        else:
            # suite name defaults to class name
            self.suite_name = self.__class__.__name__
        # add test suite to class static total list
        try:
            # make sure a suite with the same
            # name does not already exist
            if self.suite_name not in ExternalProgramTestSuite._test_suites:
                ExternalProgramTestSuite._test_suites[self.suite_name] = {
                    'self': self,
                    'name': self.suite_name,
                    'description': self.suite_description,
                    'args': kwargs,
                    'num_passed': 0,
                    'num_tests': 0,
                    'num_checks': 0,
                    'num_checks_passed': 0,
                    'execution_time': 0,
                    'has_run': False,
                    'pass_threshold': 100,
                    'passed': False
                }
            else:
                raise ValueError(
                    'A suite with the name "%s" already exists. '
                    'Please rename one of suite classes or pass a unique "suite_name" argument to one or both of the constructors.'
                )
        except ValueError as e:
            raise Exception('[%s] %s' % (type(e).__name__, e))

    def log(self, print_string, error=False, color=Fore.RESET):
        """Wrapper over print function to allow writing
        test framework output to file if desired.
        """
        # write the print output to the log files
        if self.log_framework_output:
            if error and self.stderr_file is not None:
                with open(self.stderr_file, 'a') as f:
                    f.write(print_string + "\r\n")
            elif self.stdout_file is not None:
                with open(self.stdout_file, 'a') as f:
                    f.write(print_string + "\r\n")
        # print the output and color appropriately
        if ExternalProgramTestSuite.color_output_text:
            print(color + print_string + Fore.RESET + Back.RESET +
                  Style.RESET_ALL)
        # Aptana's interactive console doesn't accept ANSI escape
        # characters but at least it colors the stderr red so
        # separate normal output from error output appropriately
        else:
            if error:
                sys.stderr.write(print_string + "\r\n")
                sys.stderr.flush()
            else:
                sys.stdout.write(print_string + "\r\n")
                sys.stdout.flush()

    def _set_suite_defaults(self):
        """Set the suite variables to their defaults
        """
        # set the default suite variables
        # default suite name and description
        self.suite_name = None
        self.suite_description = None
        # number of passed test cases
        self._num_tests_passed = 0
        # num checks and failures
        self._total_checks_passed = 0
        self._total_checks = 0
        # threshold in percentage of tests
        # passed to decide status of suite
        self.suite_pass_threshold = 100
        # whether to truncate the log file
        # before writing to it
        self.overwrite_log_file = True
        # whether to print process output
        # or just write it to the log file
        self.print_process_output = True
        self.log_framework_output = False
        # default log path
        self._default_log_file = "run.log"
        self.stdout_file = self.stderr_file = self._default_log_file
        # setup and teardown function
        self._suite_setup = None
        self._suite_teardown = None
        # timelimit values
        self._suite_timelimit_met = True
        self.suite_timelimit = None
        self.suite_case_timelimit = None
        # invalid args list
        self._invalid_args = []

    def _set_case_defaults(self):
        """ 
        Set the case variables to their defaults
        """
        # default test case variables
        self._name = None
        # whether to print process output
        # or just write it to the log file
        self.print_case_output = self.print_process_output
        # default log path
        self.stdout_file = self.stderr_file = self._default_log_file
        # default case description
        self._description = None
        # num checks and failures
        self._num_checks_passed = 0
        self._num_checks = 0
        # threshold in percentage of checks
        # passed to decide status of case
        self.case_pass_threshold = 100
        # test case time limit
        self._timelimit = self.suite_case_timelimit
        # wait to print case header
        self._wait_sem = 0
        # fixture, setup, teardown
        self._fixture = None
        self._case_setup = None
        self._case_teardown = None

    def _setup_suite(self, **kwargs):
        """ 
        Set the suite variables
        """
        # if a test suite requires common variables across all test cases,
        # they can be passed through kwargs and are set here
        for key, value in kwargs.items():
            if type(value) is str:
                exec('self.' + str(key) + '="' + value +
                     '"') in globals(), locals()
            else:
                exec('self.' + str(key) + '=' +
                     str(value)) in globals(), locals()
        # each function in a test suite class is a test case
        # so get the cases and add them to the testSuites list
        test_names = {
            key: value
            for key, value in self.__class__.__dict__.items()
            if isinstance(value, FunctionType)
        }
        self.test_cases = []
        for name in test_names:
            if name == "setup":
                self._suite_setup = getattr(self, name)
            elif name == "teardown":
                self._suite_teardown = getattr(self, name)
            elif 'fixture' not in name.lower():
                self.test_cases.append(name)

    def _setup_case(self):
        # if a suite has startted running and the overwrite log file
        # flag was set to True, truncate the log files
        if self.overwrite_log_file and (
                not ExternalProgramTestSuite._has_run or len([
                    (x) for x in [self.stdout_file, self.stderr_file]
                    if x not in ExternalProgramTestSuite._all_log_files
                ]) > 0):
            for log_file in [self.stdout_file, self.stderr_file]:
                ExternalProgramTestSuite._all_log_files.add(log_file)
                with open(log_file, 'w') as f:
                    f.truncate(0)

    def _end_case(self):
        # call fixture teardown if set
        if self._case_teardown is not None:
            if isinstance(self._case_teardown, MethodType):
                self._case_teardown(self)
            else:
                self._case_teardown()

    def _validate_argument(self, argument, types):
        key = argument.keys()[0]
        valid, message = assert_variable_type(argument[key], types, False)
        if not valid:
            self._invalid_args.append("%s: %s" % (key, message))

    def _validate_suite_arguments(self):
        """ 
        Validate test suite argument types
        """
        # reset invalid args list
        self._invalid_args = []
        #string
        string_vars = [{
            "suite_description": self.suite_description
        }, {
            "stdout_file": self.stdout_file
        }, {
            "stderr_file": self.stderr_file
        }]
        [self._validate_argument(x, [str, NoneType]) for x in string_vars]
        # bool
        bool_vars = [{
            "overwrite_log_file": self.overwrite_log_file
        }, {
            "print_process_output": self.print_process_output
        }, {
            "log_framework_output": self.log_framework_output
        }]
        [self._validate_argument(x, bool) for x in bool_vars]
        # float
        float_vars = [{
            "suite_timelimit": self.suite_timelimit
        }, {
            "suite_case_timelimit": self.suite_case_timelimit
        }]
        [
            self._validate_argument(x, [int, float, NoneType])
            for x in float_vars
        ]
        # functions
        function_vars = [{
            "suite setup": self._suite_setup
        }, {
            "suite teardown": self._suite_teardown
        }]
        [
            self._validate_argument(x, [MethodType, NoneType])
            for x in function_vars
        ]
        # raise exception if any invalid args
        if len(self._invalid_args) > 0:
            raise InvalidArgument(('\r\n').join(self._invalid_args))

    def _validate_test_arguments(self):
        """ 
        Validate test case argument types
        """
        # reset invalid args list
        self._invalid_args = []
        #string
        string_vars = [{
            'description': self._description
        }, {
            'name': self._name
        }]
        [self._validate_argument(x, [str, NoneType]) for x in string_vars]
        # float
        float_vars = [{'timelimit': self._timelimit}]
        [
            self._validate_argument(x, [int, float, NoneType])
            for x in float_vars
        ]
        # fixture
        try:
            if self._fixture is not None:
                self._case_setup, self._case_teardown = self._fixture()
        except Exception:
            self._invalid_args.append(
                'a proper fixture returning a setup and teardown function was not provided'
            )
        # functions (fixture override)
        function_vars = [{
            'case setup': self._case_setup
        }, {
            'case teardown': self._case_teardown
        }]
        [
            self._validate_argument(x, [FunctionType, MethodType, NoneType])
            for x in function_vars
        ]
        # raise exception if any invalid args
        if len(self._invalid_args) > 0:
            raise InvalidArgument(('\r\n').join(self._invalid_args))

    def run(self, suite_name=None):
        """
        Run the test suite
        """
        # capture start time
        suite_start_time = timeit.default_timer()
        # setup suite
        if suite_name is None:
            suite_name = self.suite_name
        self._setup_suite(
            **ExternalProgramTestSuite._test_suites[suite_name]['args'])
        # validate suite args
        try:
            self._validate_suite_arguments()
        except Exception as e:
            ExternalProgramTestSuite._test_suites[
                self.suite_name]['has_run'] = True
            raise SuiteError('Error in test suite "%s" [%s] %s' %
                             (suite_name, type(e).__name__, e))
        # run all the test cases
        for index, case in enumerate(sorted(self.test_cases)):
            self.test_case = getattr(self, case)
            if not self.test_case:
                raise Exception("Test Case %s does not exist" %
                                str(self.test_case))
            # reset the default suite/case variables
            self._set_case_defaults()
            # set test case name to case
            self._name = case
            # suite setup routine
            self._setup_case()
            # print test suite name and descripion if any
            # and if first loop through cases
            if index == 0:
                self.log("=" * ExternalProgramTestSuite._num_formatting_chars)
                self.log("TEST SUITE: %s" % suite_name, False,
                         ExternalProgramTestSuite.suite_header_color)
                if self.suite_description:
                    self.log("Description: %s" % (self.suite_description))
                    ExternalProgramTestSuite._test_suites[suite_name][
                        'description'] = self.suite_description
                # call suite setup function if set
                if self._suite_setup is not None:
                    self._suite_setup()
            # run the test case
            try:
                self._run_test_case()
            except Exception as e:
                self.log('[%s] %s' % (type(e).__name__, e), True, Fore.RED)
            # set has_run flags
            ExternalProgramTestSuite._has_run = True
            # set suite attributes for static _test_suites list
            ExternalProgramTestSuite._test_suites[
                self.suite_name]['has_run'] = True
            ExternalProgramTestSuite._test_suites[
                self.suite_name]['pass_threshold'] = self.suite_pass_threshold
            # end case routine
            self._end_case()
        # capture suite end time
        suite_end_time = timeit.default_timer()
        suite_time_taken = suite_end_time - suite_start_time
        ExternalProgramTestSuite._test_suites[
            self.suite_name]['execution_time'] = suite_time_taken
        # if a timelimit was set
        # check if it was met
        if self.suite_timelimit is not None:
            self.log("_" * ExternalProgramTestSuite._num_formatting_chars)
            if suite_time_taken <= self.suite_timelimit:
                self.log(
                    'CHECK PASS: suite completed before time limit of %.4f' %
                    self.suite_timelimit, False, Back.GREEN)
                self._total_checks_passed += 1
            else:
                self.log(
                    'CHECK FAIL: suite did not complete before time limit of %.4f'
                    % self.suite_timelimit, True, Back.RED)
                self._suite_timelimit_met = False
            self._total_checks += 1
        # call suite teardown function if set
        if self._suite_teardown is not None:
            self._suite_teardown()
        # print test result
        self._print_suite_results()

    def case_header(self):
        """ Test case header output 
        """
        # print case name
        self.log("-" * ExternalProgramTestSuite._num_formatting_chars)
        self.log("CASE: %s" % self._name, False,
                 ExternalProgramTestSuite.case_header_color)
        # print description if any
        if self._description is not None:
            self.log("Description: %s" % (str(self._description)))
        self.log("-" * ExternalProgramTestSuite._num_formatting_chars)
        # validate args
        try:
            self._validate_test_arguments()
        except Exception as e:
            self.log('[%s] %s' % (type(e).__name__, e), True, Fore.RED)
        # call fixture setup if set
        if self._case_setup is not None:
            if isinstance(self._case_setup, MethodType):
                self._case_setup(self)
            else:
                self._case_setup()

    def _run_test_case(self):
        """
        Run an individual test case
        """
        # read source file to see decorators and
        # call case_header at the right time
        test_function = self._name
        suite_class = str(self.__class__).rpartition('.')[2]
        lines = []
        save_lines = False
        with open(inspect.getmodule(self.__class__).__file__) as f:
            for line in f:
                if suite_class in line:
                    save_lines = True
                if save_lines and test_function in line:
                    break
                if save_lines and 'def ' in line:
                    lines = []
                if (save_lines and len(line.strip()) > 0
                        and line.strip()[0] == "@"):
                    lines.append(line.strip().rpartition('(')[0])
        # semaphore to wait for calling
        # case_header after all decorators
        self._wait_sem = len(lines)
        # if semaphor is 0 print case header immediately
        if self._wait_sem == 0:
            self.case_header()
        # run test case
        execution_time = timeit.timeit(self.test_case, number=1)
        # if a timelimit was set
        # check if it was met
        if self._timelimit is not None:
            if execution_time <= self._timelimit:
                self.log(
                    'CHECK PASS: test completed before time limit of %.4f' %
                    self._timelimit, False, Back.GREEN)
                self._num_checks_passed += 1
            else:
                self.log(
                    'CHECK FAIL: test did not complete before time limit of %.4f'
                    % self._timelimit, True, Back.RED)
            self._num_checks += 1
        # print pass/fail, execution time
        if self._num_checks > 0:
            percentage_passed = (self._num_checks_passed * 1.0 /
                                 self._num_checks) * 100
        else:
            percentage_passed = 0
        output_string = ("%d/%d (%.2f%%) CHECKS in %.4f seconds" %
                         (self._num_checks_passed, self._num_checks,
                          percentage_passed, execution_time))
        if percentage_passed >= self.case_pass_threshold or self._num_checks == 0:
            output_string += " TEST PASS"
            if self.case_pass_threshold != 100:
                output_string += " with %.2f%% threshold" % self.case_pass_threshold
            self.log(output_string, False, Back.GREEN)
            self._num_tests_passed += 1
        else:
            output_string += " TEST FAIL"
            self.log(output_string, False, Back.RED)
        self._total_checks += self._num_checks
        self._total_checks_passed += self._num_checks_passed

    def _print_suite_results(self):
        self.log("*" * ExternalProgramTestSuite._num_formatting_chars)
        self.log("SUITE RESULT", False,
                 ExternalProgramTestSuite.suite_result_header_color)
        self.log("*" * ExternalProgramTestSuite._num_formatting_chars)
        passed = self._print_info_and_status()
        self.log("=" * ExternalProgramTestSuite._num_formatting_chars)
        # add test result to class static suite list
        ExternalProgramTestSuite._test_suites[
            self.suite_name]['num_tests'] = len(self.test_cases)
        ExternalProgramTestSuite._test_suites[
            self.suite_name]['num_passed'] = self._num_tests_passed
        ExternalProgramTestSuite._test_suites[
            self.suite_name]['passed'] = passed
        ExternalProgramTestSuite._test_suites[
            self.suite_name]['num_checks'] = self._total_checks
        ExternalProgramTestSuite._test_suites[
            self.suite_name]['num_checks_passed'] = self._total_checks_passed

    def _print_info_and_status(self, suite_name=""):
        num_tests = len(self.test_cases)
        passed = False
        try:
            if num_tests > 0:
                percentage_tests_passed = (self._num_tests_passed * 1.0 /
                                           num_tests) * 100
            else:
                percentage_tests_passed = 0
            if self._total_checks > 0:
                percentage_checks_passed = (self._total_checks_passed * 1.0 /
                                            self._total_checks) * 100
            else:
                percentage_checks_passed = 0
            output_string = (
                "%s%d/%d (%.2f%%) TESTS with %d/%d (%.2f%%) CHECKS in %.4f seconds"
                % (suite_name, self._num_tests_passed, num_tests,
                   percentage_tests_passed, self._total_checks_passed,
                   self._total_checks, percentage_checks_passed,
                   ExternalProgramTestSuite._test_suites[
                       self.suite_name]['execution_time']))
            if percentage_tests_passed >= self.suite_pass_threshold and self._suite_timelimit_met:
                output_string += " OK"
                if self.suite_pass_threshold != 100:
                    output_string += " with %.2f%% threshold" % self.suite_pass_threshold
                self.log(output_string, False, Back.GREEN)
                passed = True
            else:
                output_string += " NOT OK"
                self.log(output_string, False, Back.RED)
        except Exception as e:
            self.log('[%s] %s' % (type(e).__name__, e), True, Fore.RED)
        return passed

    def check_subprocess(self,
                         executable_command,
                         command_arguments,
                         expected_returncode,
                         timeout=None,
                         print_process_output=True,
                         stdout_file=None,
                         stderr_file=None,
                         poll_seconds=.100):
        process = None
        try:
            process, execution_time = run_subprocess(
                executable_command, command_arguments, timeout,
                print_process_output, stdout_file, stderr_file, poll_seconds)
        except OSError as e:
            self.log('[%s] %s' % (type(e).__name__, e), True, Fore.RED)
        except ValueError as e:
            self.log('[%s] %s' % (type(e).__name__, e), True, Fore.RED)
        except TimeoutError as e:
            self.log('[%s] %s' % (type(e).__name__, e), True, Fore.RED)
        # print pass/fail, execution time
        if process is not None:
            if process.returncode == expected_returncode:
                self.log('CHECK PASS', False, Back.GREEN)
                self._num_checks_passed += 1
            else:
                self.log('CHECK FAIL', True, Back.RED)
            self.log("%.4f seconds" % (execution_time))
        else:
            self.log('CHECK FAIL', True, Back.RED)
        self._num_checks += 1

    @staticmethod
    def run_all():
        """
        Run all registered test suites that have run
        """
        ExternalProgramTestSuite._has_run = False
        for suite, properties in ExternalProgramTestSuite._test_suites.items():
            try:
                ExternalProgramTestSuite.run(properties['self'],
                                             properties['name'])
            except Exception as e:
                properties['self'].log('[%s] %s' % (type(e).__name__, e), True,
                                       Fore.RED)
                # print test result
                properties['self']._print_suite_results()
        ExternalProgramTestSuite.print_total_results()

    @staticmethod
    def print_total_results():
        """
        Print the cumulative results from all suites registered and run
        """
        # print results for each suite on one line
        # keep track of test results info for totals
        total_num_tests = 0
        total_num_passed = 0
        total_checks = 0
        total_checks_passed = 0
        total_suites_passed = 0
        total_num_suites = 0
        total_execution_time = 0
        try:
            for index, (suite, results) in enumerate(
                    ExternalProgramTestSuite._test_suites.items()):
                self = results['self']
                if index == 0:
                    self.log("*" *
                             ExternalProgramTestSuite._num_formatting_chars)
                    self.log(
                        "ALL SUITE RESULTS", False,
                        ExternalProgramTestSuite.suite_result_header_color)
                    self.log("*" *
                             ExternalProgramTestSuite._num_formatting_chars)
                if results['has_run']:
                    self._print_info_and_status(suite + ": ")
                    total_num_tests += results['num_tests']
                    total_num_passed += results['num_passed']
                    if total_num_tests > 0 and results['passed']:
                        total_suites_passed += 1
                    total_checks += results['num_checks']
                    total_checks_passed += results['num_checks_passed']
                    total_execution_time += results['execution_time']
                    self.log("_" *
                             ExternalProgramTestSuite._num_formatting_chars)
                    total_num_suites += 1
            # print cumulative total pass/fail
            if total_num_tests > 0:
                if total_checks > 0:
                    percentage_checks_passed = (total_checks_passed * 1.0 /
                                                total_checks) * 100
                else:
                    percentage_checks_passed = 0
                self.log("TOTALS")
                self.log("." * ExternalProgramTestSuite._num_formatting_chars)
                percentage_passed = (total_suites_passed * 1.0 /
                                     total_num_suites) * 100
                self.log(
                    "%d/%d (%.2f%%) SUITES\n%d/%d (%.2f%%) TESTS\n%d/%d (%.2f%%) CHECKS\nin %.4f seconds"
                    % (total_suites_passed, total_num_suites,
                       percentage_passed, total_num_passed, total_num_tests,
                       (total_num_passed * 1.0 / total_num_tests) * 100,
                       total_checks_passed, total_checks,
                       percentage_checks_passed, total_execution_time))
            if percentage_passed == 100:
                self.log("OK", False, Back.GREEN)
            else:
                self.log("NOT OK", False, Back.RED)
            self.log("." * ExternalProgramTestSuite._num_formatting_chars)
        except Exception as e:
            print(Fore.RED + '[%s] %s' % (type(e).__name__, e) + Fore.RESET +
                  Back.RESET + Style.RESET_ALL)

Пример #19

Показать файл

def diffList(left, right, path, result):
    for x in range(len(left)):
        path2 = path + '[' + str(x) + ']'
        if x >= len(right):
            result['missingOnRight'].append(path2)
        else:
            diffValue(left[x], right[x], path2, result)
    for x in range(len(left), len(right)):
        path2 = path + '[' + str(x) + ']'
        result['missingOnLeft'].append(path2)


# ---------------------------------------------------------------------------------

ALLOWED_MISSING_ON_RIGHT = Set([".version", ".policyType", ".guid"])


def isPolicyIdentical(old, new):
    result = digdiff(old, new)
    #misc.ppprint(old)
    #misc.ppprint(new)
    debug("missingOnLeft:{}".format(result['missingOnLeft']))
    debug("missingOnRight:{}".format(result['missingOnRight']))
    debug("differsByType:{}".format(result['differsByType']))
    debug("differsByValue:{}".format(result['differsByValue']))
    if len(result['missingOnLeft']) > 0 or len(
            result['differsByType']) > 0 or len(result['differsByValue']) > 0:
        return False
    else:
        for missing in result["missingOnRight"]:

Пример #20

Показать файл

Файл: q13.py Проект: Twinkletanna/NLP

from collections import defaultdict
from sets import Set

pos_seedlist=["good", "nice", "love", "excellent", "fortunate", "correct", "superior"]
neg_seedlist=["bad", "nasty", "poor", "hate", "unfortunate", "wrong", "inferior"]

sentences=open('/home/twinkle/NLP/hw3/tweets.txt').read().strip().split("\n")
#one row
#condition to ignore
wordcounts=defaultdict(int)
pair_counts=defaultdict(int)
seedsum=defaultdict(float)
pmi=defaultdict(float)
polarity=defaultdict(float)
total=0
bow=Set()
words=[]
allwords=[]
#make a set, wordsum, break loop for both, 
i=0
i=0
#bla=[]
for sent in sentences:
    print i
    i+=1
    temp=sent.split(' ')
    words.append(list(Set(temp)))
 
    
    
for row in words:

Пример #21

Показать файл

Файл: count_emojis.py Проект: jonleonATX/slackscrape

def filter_emojis(text):
    return Set(text.replace(':', '')).issubset(allowed_chars) and len(text) > 2

Пример #22

Показать файл

Файл: ctr_per_cluster.py Проект: salonipotdar/NewsPersonalization

import sys
from sets import Set
import numpy

fdata = open(sys.argv[1])
fcluster = open(sys.argv[2])
fout = open(sys.argv[3], "w")
output_type = int(sys.argv[4])

uid_cid = {}
for line in fcluster:
    uid = int(line.split(" ")[0])
    cid = int(line.split(" ")[1])
    uid_cid[uid] = cid

doc_set = Set()

cluster_ctr = {}
for line in fdata:
    line = line[:-1]
    line_arr = line.split("|")
    user_id = int(line_arr[1])
    if user_id not in uid_cid:
        continue
    cluster_id = uid_cid[user_id]
    shown_doc = line_arr[0].split(" ")[1]
    clicked = int(line_arr[0].split(" ")[2])
    doc_set.add(shown_doc)

    if cluster_id not in cluster_ctr:
        cluster_ctr[cluster_id] = {}

Пример #23

Показать файл

Файл: wxagg.py Проект: jafo2128/TuxTruck-wxPython

class AggDC:
    PassThrough = Set(
                      '''
                         BeginDrawing
                         EndDrawing
                         GetBackground
                         GetSize
                         GetSizeTuple
                         SetBrush
                         SetPen
                      '''.split()
                     )

    def __init__(self, dc):
        self.dc = dc
        self.dc.BeginDrawing()
        w, h = self.dc.GetSizeTuple()
        self.draw = aggdraw.Draw('RGB', (w, h))
        self.draw.rectangle((0, 0, w, h), None, aggBrush(dc.GetBackground()))

    def __del__(self):
        w, h = self.dc.GetSizeTuple()
        if w and h:
            image = wx.EmptyImage(w, h)
            image.SetData(self.draw.tostring())
            self.dc.DrawBitmap(image.ConvertToBitmap(), 0, 0)
        self.dc.EndDrawing()

    def __getattr__(self, attr):
        if attr in self.PassThrough:
            return getattr(self.dc, attr)
        else:
            raise AttributeError("%s instance has no attribute '%s'" % (self.__class__.__name__, attr))


    def CrossHair(self, x, y):
        #self.dc.CrossHair(x, y)

        w, h = self.dc.GetSizeTuple()
        p = aggPen(self.dc.GetPen())
        self.draw.line((0, y, w, y), p)
        self.draw.line((x, 0, x, h), p)


    def DrawArc(self, x1, y1, x2, y2, xc, yc):
        #self.dc.DrawArc(x1, y1, x2, y2, xc, yc)

        b = aggBrush(self.dc.GetBrush())
        p = aggPen(self.dc.GetPen())
        radius = ((xc-x1)**2 + (yc-y1)**2)**0.5
        self.draw.pieslice(
                           (xc-radius, yc-radius, xc+radius, yc+radius),
                           math.degrees(math.atan2(yc-y1, x1-xc)),
                           math.degrees(math.atan2(yc-y2, x2-xc)),
                           p, b
                          )


    def DrawCircle(self, x, y, radius):
        #self.dc.DrawCircle(x, y, radius)

        b = aggBrush(self.dc.GetBrush())
        p = aggPen(self.dc.GetPen())
        self.draw.ellipse((x-radius, y-radius, x+radius, y+radius), p, b)


    def DrawEllipse(self, x, y, width, height):
        #self.dc.DrawEllipse(x, y, width, height)

        b = aggBrush(self.dc.GetBrush())
        p = aggPen(self.dc.GetPen())
        self.draw.ellipse((x, y, x+width, y+height), p, b)


    def DrawLine(self, x1, y1, x2, y2):
        #self.dc.DrawLine(x1, y1, x2, y2)

        p = aggPen(self.dc.GetPen())
        self.draw.line((x1, y1, x2, y2), p)


    def DrawRectangle(self, x, y, width, height):
        #self.dc.DrawRectangle(x, y, width, height)

        b = aggBrush(self.dc.GetBrush())
        p = aggPen(self.dc.GetPen())
        self.draw.rectangle((x, y, x+width, y+height), p, b)


    def DrawPolygon(self, points):
        #self.dc.DrawPolygon(points)
        n = []
        for p in points:
            n += p

        b = aggBrush(self.dc.GetBrush())
        p = aggPen(self.dc.GetPen())
        self.draw.polygon(n, p, b)

Пример #24

Показать файл

 def find_certain_child(certain, uncertain, possibles):
     li = []
     for name in Set(certain):
         min_ = minCount(possibles, name) - uncertain.count(name)
         li.extend(min_ * [name])
     return li

Пример #25

Показать файл

Файл: dbSNP2data.py Проект: bopopescu/gwasmodules

    def run(self):
        """
		2008-05-08
			transpose everything if output_matrix_type=1 (bjarni's SNP matrix format)
		2007-02-19
			--db_connect
			--get_snp_id2index()
			--get_strain_id2index()
			--get_strain_id_info()
			--get_snp_id_info()
			--get_data_matrix()
			if self.toss_out_rows:
				--toss_rows_to_make_distance_matrix_NA_free()
					--find_smallest_vertex_set_to_remove_all_edges()
			--write_data_matrix()
			#--sort_file()
		2007-09-22
			for mysql_connection
				add get_nativename_snpid2call_m()
				add fill_in_resolved_duplicated_calls()
		"""
        if self.debug:
            import pdb
            pdb.set_trace()
        if self.db_connection_type == 1:
            import MySQLdb
            #conn = MySQLdb.connect(db="stock",host='natural.uchicago.edu', user='******', passwd='iamhereatusc')
            conn = MySQLdb.connect(db=self.dbname,
                                   host=self.hostname,
                                   user=self.user,
                                   passwd=self.passwd)
            curs = conn.cursor()
            snp_id2index, snp_id_list, snp_id2info = self.get_snp_id2index_m(
                curs, self.input_table, self.snp_locus_table)
            strain_id2index, strain_id_list, nativename2strain_id, strain_id2acc, strain_id2category = self.get_strain_id2index_m(curs, \
                         self.input_table, self.strain_info_table, self.only_include_strains_with_GPS, \
                         self.resolve_duplicated_calls, toss_contaminants=self.toss_contaminants)

            #strain_id2acc, strain_id2category = self.get_strain_id_info_m(curs, strain_id_list, self.strain_info_table)
            #snp_id2info = self.get_snp_id_info_m(curs, snp_id_list, self.snp_locus_table)
            if self.input_table == 'dbsnp.calls':
                from variation.src.FigureOut384IlluminaABMapping import get_snps_id2mapping
                snps_id2mapping = get_snps_id2mapping(self.hostname,
                                                      dbname='dbsnp',
                                                      user=self.user,
                                                      passwd=self.passwd)
            else:
                snps_id2mapping = None
            data_matrix = self.get_data_matrix_m(curs, strain_id2index,
                                                 snp_id2index, nt2number,
                                                 self.input_table,
                                                 self.need_heterozygous_call,
                                                 snps_id2mapping)
            """
			if self.resolve_duplicated_calls:
				nativename_snpid2call = self.get_nativename_snpid2call_m(curs, self.strain_info_table, self.input_table)
				data_matrix = self.fill_in_resolved_duplicated_calls(data_matrix, strain_id2index, snp_id2index, nativename2strain_id, nativename_snpid2call)
			"""
            if self.include_other_strain_info:
                strain_id2other_info = self.get_strain_id2other_info(
                    curs, strain_id_list, self.strain_info_table,
                    self.input_table)
            else:
                strain_id2other_info = {}
        elif self.db_connection_type == 2:
            (conn, curs) = db_connect(self.hostname, self.dbname, self.schema)
            snp_id2index, snp_id_list = self.get_snp_id2index(
                curs, self.input_table, self.snp_locus_table)
            strain_id2index, strain_id_list = self.get_strain_id2index(
                curs, self.input_table)

            strain_id2acc, strain_id2category = self.get_strain_id_info(
                curs, strain_id_list, self.strain_info_table)
            snp_id2info = self.get_snp_id_info(curs, snp_id_list,
                                               self.snp_locus_table)
            data_matrix = self.get_data_matrix(curs, strain_id2index,
                                               snp_id2index, nt2number,
                                               self.input_table,
                                               self.need_heterozygous_call)
            strain_id2other_info = {}

        if self.toss_out_rows:
            rows_to_be_tossed_out = self.toss_rows_to_make_distance_matrix_NA_free(
                data_matrix)
            rows_to_be_tossed_out = Set(rows_to_be_tossed_out)
        else:
            rows_to_be_tossed_out = Set()

        #05/08/08
        if self.discard_all_NA_strain:
            from variation.src.FilterStrainSNPMatrix import FilterStrainSNPMatrix
            remove_rows_data = FilterStrainSNPMatrix.remove_rows_with_too_many_NAs(
                data_matrix, row_cutoff=1)
            rows_with_too_many_NAs_set = remove_rows_data.rows_with_too_many_NAs_set
            #row_index2no_of_NAs = remove_rows_data.row_index2no_of_NAs
            rows_to_be_tossed_out.update(rows_with_too_many_NAs_set)

        strain_acc_list = [
            strain_id2acc[strain_id] for strain_id in strain_id_list
        ]
        category_list = [
            strain_id2category[strain_id] for strain_id in strain_id_list
        ]

        strain_acc2other_info = {}
        for strain_id in strain_id2other_info:
            strain_acc2other_info[
                strain_id2acc[strain_id]] = strain_id2other_info[strain_id]

        if self.output_matrix_type == 1:
            #transpose everything
            data_matrix = num.array(data_matrix)
            data_matrix = num.transpose(data_matrix)

            header = ['Chromosomes', 'Positions'] + strain_acc_list
            chromosome_ls = []
            position_ls = []
            for snp_id in snp_id_list:
                snp_name, chromosome, position = snp_id2info[snp_id]
                chromosome_ls.append(chromosome)
                position_ls.append(position)

            strain_acc_list = chromosome_ls
            category_list = position_ls
            cols_to_be_tossed_out = rows_to_be_tossed_out
            rows_to_be_tossed_out = None
            strain_id2other_info = None  #make up one
        else:
            header = ['strain', 'category']
            for snp_id in snp_id_list:
                snp_name, chromosome, position = snp_id2info[snp_id]
                header.append(snp_name)
            cols_to_be_tossed_out = None

        write_data_matrix(data_matrix, self.output_fname, header, strain_acc_list, category_list, rows_to_be_tossed_out=rows_to_be_tossed_out, \
           cols_to_be_tossed_out=cols_to_be_tossed_out, nt_alphabet=self.nt_alphabet,\
           strain_acc2other_info=strain_acc2other_info, delimiter=self.delimiter)

Пример #26

Показать файл

Файл: process_out_data.py Проект: neuralcore/spaun2

def process_line(task_str, task_data_str):
    # Ignore any responses that make it into the task string
    task_str = mass_str_replace(task_str, response_strs, '')

    # Process task_data_str into component bits
    # For all tasks except learning task, extract spaun's answer
    if task_str in ['A0', 'A1', 'A3', 'A4', 'A5', 'A6', 'A7']:
        # Split the task data string into before and after the question mark
        task_data_split = task_data_str.split('?', 1)

        # The task information is before the question mark
        task_info = task_data_split[0].replace("'", '')
        # Filter out the MNIST digits
        task_info = remove_MNIST_strs(task_info)

        # Record special characters
        has_F = 'F' in task_info
        has_R = 'R' in task_info
        has_P = 'P' in task_info
        has_K = 'K' in task_info

        # Split up the different components of the task info
        task_info_split = task_info.split(']')

        if task_info_split[-1] == '':
            task_info_split = task_info_split[:-1]

        # Remove [ ]'s and special characters from each part of task_info_split
        for i in range(len(task_info_split)):
            task_info_split[i] = \
                mass_str_replace(task_info_split[i],
                                 ['[', ']', 'F', 'R', 'P', 'K', '-'], '')

        # Spaun's answer is after the question mark
        task_answer_spaun = \
            np.array(list(mass_str_replace(task_data_split[1],
                                           response_strs, num_list_strs)))

        if len(task_answer_spaun) == 0:
            return (None, None)

    # ------ Reference answer generation ------
    if task_str in ['A0', 'A1', 'A3']:
        # For copy-draw, classification, memory task
        task_info = np.array(list(task_info_split[0]))
        if has_R:
            task_answer_ref = task_info[-1::-1]
        else:
            task_answer_ref = task_info
    elif task_str == 'A4':
        # For counting tasks
        start_num = int(task_info_split[0])
        count_num = int(task_info_split[1])
        ans_num = start_num + count_num

        # Ignore invalid task options
        if ans_num > 9:
            task_str = 'INVALID'
            warn('A4: Computed answer > 9')

        task_answer_ref = np.array([str(ans_num)])
    elif task_str == 'A5':
        # QA task
        num_list = map(int, list(task_info_split[0]))
        probe_num = int(task_info_split[1])

        if has_P:
            task_answer_ref = np.array([str(num_list[probe_num - 1])])
        elif has_K:
            task_answer_ref = np.array([str(num_list.index(probe_num) + 1)])
        else:
            task_str = 'INVALID'
            warn('A5: No valid P/K for QA task')
    elif task_str == 'A6':
        from sets import Set
        # RVC task
        if len(task_info_split) % 2:
            match_list = None
            for i in range(len(task_info_split) / 2):
                list1 = np.array(list(task_info_split[i * 2]))
                list2 = np.array(list(task_info_split[i * 2 + 1]))
                if match_list is None:
                    match_list = [
                        Set(np.where(list1 == item)[0]) for item in list2
                    ]
                else:
                    # TODO: Check for inconsistencies across pairs
                    if len(list2) != len(match_list):
                        warn('A6: Inconsistent RVC ref answer lengths.')
                        task_str = 'INVALID'
                    else:
                        match_list = [
                            match_list[j]
                            & Set(np.where(list1 == list2[j])[0])
                            for j in range(len(match_list))
                        ]
            list1 = np.array(list(task_info_split[-1]))
            task_answer_ref = np.array(
                [list1[list(set_list)[0]] for set_list in match_list])
        else:
            task_str = 'INVALID'
            warn('A6: Invalid RVC task. No question list given.')
    elif task_str == 'A7':
        # Raven's induction task
        # Induction task comes in 3 forms: changing list len, and changing
        #                                  number relations, identical lists
        col_count = 1
        induction_diff = None
        induction_len_change = None
        induction_identity = None

        for i in range(1, len(task_info_split)):
            if col_count % 3 == 0:
                col_count += 1
                continue
            list1 = map(int, np.array(list(task_info_split[i - 1])))
            list2 = map(int, np.array(list(task_info_split[i])))

            # Handle the following cases:
            # 1. Unchanging list lengths of len 1
            if len(list1) == len(list2) == 1:
                diff = list2[0] - list1[0]
                if induction_diff is None:
                    induction_diff = diff
                if induction_diff != diff:
                    warn('A7: Inconsistent change between induction items')
                    task_str = 'INVALID'
            # 2. Changing list lengths, but containing identical items
            elif (list1[0] == list2[0]) and (len(list1) != len(list2)):
                len_change = len(list2) - len(list1)
                if induction_len_change is None:
                    induction_len_change = len_change
                if induction_len_change != len_change:
                    warn('A7: Inconsistent change between list lenghts')
                    task_str = 'INVALID'
            elif (len(list1) == len(list2)) and (list1 == list2):
                induction_identity = True
            else:
                warn('A7: Unhandled induction task type')
                task_str = 'INVALID'

            # Handle transition to next row
            col_count += 1

        def spaun_response_to_int(c):
            return int(c) if c.isdigit() else -1

        list1 = map(spaun_response_to_int, list(task_info_split[-1]))
        if induction_diff is not None and induction_len_change is None and \
           induction_identity is None:
            task_answer_ref = np.array(map(str, [list1[0] + induction_diff]))
        elif (induction_len_change is not None and induction_diff is None
              and induction_identity is None):
            task_answer_ref = np.array(
                map(str, [list1[0]] * (len(list1) + len_change)))
        elif (induction_len_change is None and induction_diff is None
              and induction_identity is not None):
            task_answer_ref = np.array(map(str, list1))
        else:
            warn('A7: Multiple induction types encountered?')
            task_str = 'INVALID'

    # Format the task answer list (make the same length as the reference
    # answer list). Applies to all but learning task
    if task_str == 'INVALID':
        return task_str, np.array([0])

    if task_str in ['A0', 'A1', 'A3', 'A4', 'A5', 'A6', 'A7']:
        task_answer = np.chararray(task_answer_ref.shape)
        task_answer[:] = ''
        task_answer_len = min(len(task_answer_ref), len(task_answer_spaun))
        task_answer[:task_answer_len] = task_answer_spaun[:task_answer_len]

        # DEBUG
        # print task_data_str, task_answer, task_answer_ref
    else:
        print task_data_str

    if task_str in ['A0', 'A1', 'A3']:
        # For memory, recognition, copy drawing tasks, check recall accuracy
        # per item
        return ('_'.join([task_str, str(len(task_answer_ref))]),
                map(int, task_answer == task_answer_ref))

    if task_str in ['A4', 'A5', 'A6', 'A7']:
        # For other non-learning tasks, check accuracy as wholesale correct /
        # incorrect
        if task_answer[0] == '-':
            return (None, None)
        return ('_'.join([task_str, str(len(task_answer_ref))]),
                [int(np.all(task_answer == task_answer_ref))])

Пример #27

Показать файл

def read_hyperion_config(file_path):
    """
    Parses hyperion config file.
    """
    with open(file_path) as hyperion_config_json:
        config = commentjson.load(hyperion_config_json)

        leds = []

        x_coords = []
        y_coords = []

        for led in config.get('leds', []):
            hscan = led['hscan']
            vscan = led['vscan']
            hmin = hscan['minimum']
            hmax = hscan['maximum']
            vmin = vscan['minimum']
            vmax = vscan['maximum']
            h_center = round(((hmin + hmax) / 2) * 100, 2)
            v_center = round(((vmin + vmax) / 2) * 100, 2)
            x_coords.append(h_center)
            y_coords.append(v_center)
            leds.append({'x': h_center, 'y': v_center})

        xcounts = []
        left = None
        right = None

        for x in Set(x_coords):
            xcounts.append({'x': x, 'count': x_coords.count(x)})

        if len(dict(
            (xcount['count'], xcount) for xcount in xcounts).values()) > 1:
            # Position might not be minimum for TV setups
            xcounts.sort(key=operator.itemgetter('count'))
            right = xcounts[len(xcounts) - 2]
            left = xcounts[len(xcounts) - 1]
        else:
            # Position should be minimum for matrix setups
            xcounts.sort(key=operator.itemgetter('x'))
            right = xcounts[len(xcounts) - 1]
            left = xcounts[0]

        if right['x'] < left['x']:
            left, right = right, left

        ycounts = []
        top = None
        bottom = None

        for y in Set(y_coords):
            ycounts.append({'y': y, 'count': y_coords.count(y)})

        if len(dict(
            (ycount['count'], ycount) for ycount in ycounts).values()) > 1:
            # Position might not be minimum for TV setups
            ycounts.sort(key=operator.itemgetter('count'))
            bottom = ycounts[len(ycounts) - 2]
            top = ycounts[len(ycounts) - 1]
        else:
            # Position should be minimum for matrix setups
            ycounts.sort(key=operator.itemgetter('y'))
            bottom = ycounts[len(ycounts) - 1]
            top = ycounts[0]

        if bottom['y'] < top['y']:
            top, bottom = bottom, top

        leds_left = []
        leds_right = []
        leds_top = []
        leds_bottom = []

        for i, led in enumerate(leds):
            x = led['x']
            y = led['y']
            if x == left['x']:
                leds_left.append(i)
            elif x == right['x']:
                leds_right.append(i)
            elif y == top['y']:
                leds_top.append(i)
            elif y == bottom['y']:
                leds_bottom.append(i)

        # Sort the lists
        leds_top.sort(key=lambda i: leds[i]['x'], reverse=False)
        leds_right.sort(key=lambda i: leds[i]['y'], reverse=False)
        leds_bottom.sort(key=lambda i: leds[i]['x'], reverse=True)
        leds_left.sort(key=lambda i: leds[i]['y'], reverse=True)

        # Not the lists run like this:

        #  >>>>>>> TOP >>>>>>>
        #  ^                 v
        #  ^                 v
        # LEFT              RIGHT
        #  ^                 v
        #  ^                 v
        #  <<<<< BOTTOM <<<<<<

        # print 'leds_top: {}'.format(leds_top)
        # print 'leds_right: {}'.format(leds_right)
        # print 'leds_bottom: {}'.format(leds_bottom)
        # print 'leds_left: {}'.format(leds_left)

        return (leds, leds_top, leds_right, leds_bottom, leds_left)

Пример #28

Показать файл

Файл: calculator.py Проект: Asbjourn/PokemonGo

effectivenessMap = {
    -2 : 0.51,
    -1 : 0.714,
    0 : 1,
    1 : 1.4
}

attributeNum = Set([
    "height",
    "weight",
    "number",
    "maxcp",
    "attack",
    "defense",
    "stamina",
    "damage",
    "energy",
    "energy gain",
    "dps",
    "eps",
    "cooldown",
    "activation",
    "bars"
])

def parsePokemon(attributes, poke):
    global pokemon
    current = {}
    delta = 0
    for i in range(0, len(attributes)):
        attr = attributes[i]

Пример #29

Показать файл

Файл: views.py Проект: foss-transportationmodeling/rettina-server

def get_routes():
    routes = None
    valid_trips = None
    n = 1
    try:
        n = int(request.args.get('next', 1))
    except ValueError:
        return jsonify({ '404' : 'Cannot parse \'next\' parameter'}), 404
    if len(request.args.keys()) > 0:
        # filter routes by the provided URL parameters
        lat1 = request.args.get('lat1', 999)
        lon1 = request.args.get('lon1', 999)
        lat2 = request.args.get('lat2', 999)
        lon2 = request.args.get('lon2', 999)
        if lat1 == 999 or lon1 == 999 or lat2 == 999 or lon2 == 999:
            # the parameters provided cannot be used to filter, so return error
            return jsonify({ '404' : 'Bad URL Parameters'}), 404
        else:
            stop_times = []
            start = decode(request.args.get('start', ''))
            stop = decode(request.args.get('stop', ''))
            
            if len(stop) > 0 and len(start) == 0:
                # the parameters provided cannot be used to filter, so return error
                return jsonify({ '404' : 'Cannot have end time without start time'}), 404
            elif len(start) == 0 and len(stop) == 0:
                # filter by latitude and longitude only
                stop_times = models.StopTime.query.filter(models.StopTime.stop_lon >= lon1, models.StopTime.stop_lon <= lon2, models.StopTime.stop_lat >= lat1, models.StopTime.stop_lat <= lat2)
            else:
                start_time = None
                stop_time = None
                try:
                    start_time = gtfs_parser.datetime_from_string(start)
                    if len(stop) > 0:
                        stop_time = gtfs_parser.datetime_from_string(stop)
                except:
                    return jsonify({ '404' : 'Cannot parse time'}), 404
                if not stop_time is None:
                    # filter within a range of time
                    stop_times = models.StopTime.query.filter(models.StopTime.stop_lon >= lon1, models.StopTime.stop_lon <= lon2, models.StopTime.stop_lat >= lat1, models.StopTime.stop_lat <= lat2, models.StopTime.arrival_time >= start_time, models.StopTime.departure_time <= stop_time)
                else:
                    # filter from initial time only
                    stop_times = models.StopTime.query.filter(models.StopTime.stop_lon >= lon1, models.StopTime.stop_lon <= lon2, models.StopTime.stop_lat >= lat1, models.StopTime.stop_lat <= lat2, models.StopTime.arrival_time >= start_time)
                
            stop_times = array_from_query(stop_times)
            stop_times.sort(key = lambda st: st.arrival_time, reverse = False)
            trips = []
            for stop_time in stop_times:
                trips.append(stop_time.trip)
            trips = unique_array(trips)
            
            filtered_routes = Set()
            for trip in trips:
                filtered_routes.add(trip.route)
            routes = filtered_routes
            valid_trips = trips
    else:
        # otherwise, no URL parameters are provided, so return all routes
        routes = models.Route.query.all()
        
    return jsonify({ 'routes' : [r.serialize(valid_trips, n) for r in routes] })

Пример #30

Показать файл

Файл: pix2pixHD_model.py Проект: lzhbrian/Garment_Generation

    def initialize(self, opt):
        BaseModel.initialize(self, opt)
        if opt.resize_or_crop != 'none' or not opt.isTrain:  # when training at full res this causes OOM
            torch.backends.cudnn.benchmark = True
        self.isTrain = opt.isTrain
        self.use_features = opt.instance_feat or opt.label_feat
        self.gen_features = self.use_features and not self.opt.load_features
        input_nc = opt.label_nc if opt.label_nc != 0 else opt.input_nc

        ##### define networks
        # Generator network
        netG_input_nc = input_nc + opt.otherInfo_nc
        if not opt.no_instance:
            netG_input_nc += 1
        if self.use_features:
            netG_input_nc += opt.feat_num
        self.netG = networks.define_G(netG_input_nc,
                                      opt.output_nc,
                                      opt.ngf,
                                      opt.netG,
                                      opt.n_downsample_global,
                                      opt.n_blocks_global,
                                      opt.n_local_enhancers,
                                      opt.n_blocks_local,
                                      opt.norm,
                                      gpu_ids=self.gpu_ids)

        # Discriminator network
        if self.isTrain:
            use_sigmoid = opt.no_lsgan
            netD_input_nc = input_nc + opt.output_nc
            if not opt.no_instance:
                netD_input_nc += 1
            self.netD = networks.define_D(netD_input_nc,
                                          opt.ndf,
                                          opt.n_layers_D,
                                          opt.norm,
                                          use_sigmoid,
                                          opt.num_D,
                                          not opt.no_ganFeat_loss,
                                          gpu_ids=self.gpu_ids)

        ### Encoder network
        if self.gen_features:
            self.netE = networks.define_G(opt.output_nc,
                                          opt.feat_num,
                                          opt.nef,
                                          'encoder',
                                          opt.n_downsample_E,
                                          norm=opt.norm,
                                          gpu_ids=self.gpu_ids)
        if self.opt.verbose:
            print('---------- Networks initialized -------------')

        # Preprocessor network
        self.netP = networks.define_P(opt.otherInfoTotalSize, opt.otherInfo_nc)

        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.netP.to(device)

        # load networks
        if not self.isTrain or opt.continue_train or opt.load_pretrain:
            pretrained_path = '' if not self.isTrain else opt.load_pretrain
            self.load_network(self.netG, 'G', opt.which_epoch, pretrained_path)
            self.load_network(self.netP, 'P', opt.which_epoch, pretrained_path)
            if self.isTrain:
                self.load_network(self.netD, 'D', opt.which_epoch,
                                  pretrained_path)
            if self.gen_features:
                self.load_network(self.netE, 'E', opt.which_epoch,
                                  pretrained_path)

        # set loss functions and optimizers
        if self.isTrain:
            if opt.pool_size > 0 and (len(self.gpu_ids)) > 1:
                raise NotImplementedError(
                    "Fake Pool Not Implemented for MultiGPU")
            self.fake_pool = ImagePool(opt.pool_size)
            self.old_lr = opt.lr

            # define loss functions
            self.loss_filter = self.init_loss_filter(not opt.no_ganFeat_loss,
                                                     not opt.no_vgg_loss,
                                                     not opt.no_smooth_loss,
                                                     not opt.no_nonzero_loss)

            self.criterionGAN = networks.GANLoss(use_lsgan=not opt.no_lsgan,
                                                 tensor=self.Tensor)
            self.criterionFeat = torch.nn.L1Loss()
            if not opt.no_vgg_loss:
                self.criterionVGG = networks.VGGLoss(self.gpu_ids)

            if not opt.no_smooth_loss:
                self.criterionSmooth = networks.SmoothLoss(self.gpu_ids)

            if not opt.no_nonzero_loss:
                self.criterionNonzero = networks.NonzeroLoss(self.gpu_ids)

            # Names so we can breakout loss
            self.loss_names = self.loss_filter('G_GAN', 'G_GAN_Feat', 'G_VGG',
                                               'G_smooth', 'G_nonzero',
                                               'D_real', 'D_fake')

            # initialize optimizers
            # optimizer G
            if opt.niter_fix_global > 0:
                import sys
                if sys.version_info >= (3, 0):
                    finetune_list = set()
                else:
                    from sets import Set
                    finetune_list = Set()

                params_dict = dict(self.netG.named_parameters())
                params = []
                for key, value in params_dict.items():
                    if key.startswith('model' + str(opt.n_local_enhancers)):
                        params += [value]
                        finetune_list.add(key.split('.')[0])
                print(
                    '------------- Only training the local enhancer network (for %d epochs) ------------'
                    % opt.niter_fix_global)
                print('The layers that are finetuned are ',
                      sorted(finetune_list))
            else:
                params = list(self.netG.parameters())
            if self.gen_features:
                params += list(self.netE.parameters())
            self.optimizer_G = torch.optim.Adam(params,
                                                lr=opt.lr,
                                                betas=(opt.beta1, 0.999))

            # optimizer D
            params = list(self.netD.parameters())
            self.optimizer_D = torch.optim.Adam(params,
                                                lr=opt.lr,
                                                betas=(opt.beta1, 0.999))