示例#1
0
def calculateWeightage():
    global totalSample
    global jsonData
    global Featureset

    csv_filename = "WebcredNormalized.csv"

    f = open(csv_filename, 'r')
    data = f.readlines()
    pipe = Pipeline()
    # get json data
    jsonData = pipe.converttojson(data)
    totalSets = 10
    # sets of possible weightages
    weightage = []
    totalSample = int(
        subprocess.check_output(['wc', '-l', csv_filename]).split(' ')[0]) - 1
    filterKeys = ['url', 'wot', 'cookie', 'redirected']
    FeaturesName = list((set(jsonData[0].keys()) - set(filterKeys)))

    Featureset = []
    alexaScoreSet = []

    for i in range(totalSets):
        count = 0
        # select sample sets
        while True:
            sample = getjsonData()
            featurevalue, alexaScore, wotScore = getFeatureValue([sample],
                                                                 FeaturesName)
            if checksimiliarData(featurevalue[0]):
                Featureset.append(featurevalue[0])
                alexaScoreSet.append(alexaScore)
                count += 1
                if count == len(FeaturesName) - 1:
                    # sum of all weightage== 1, misc. genre == 0.1
                    temp = []
                    for j in range(len(FeaturesName)):
                        temp.append(1)
                    Featureset.append(temp)
                    alexaScoreSet.append([0.9])
                    break

        # get weightage of individual feature
        weightage.append(getWeightage(Featureset, alexaScoreSet))
        print 'getting', i, 'set of weightages'

    finalWeightage = np.mean(weightage, axis=0).tolist()

    total = 0
    for i in finalWeightage:
        total += i

    print total
    print finalWeightage
示例#2
0
            print weightage, alexaSimilarityScore

        elif action == 'bn':
            global totalSample
            global jsonData
            global Featureset
            Featureset = []
            alexaScoreSet = []
            wotScoreSet = []
            csv_filename = "WebcredNormalized.csv"

            f = open(csv_filename, 'r')
            data = f.readlines()
            pipe = Pipeline()
            # get json data
            jsonData = pipe.converttojson(data)
            totalSample = int(
                subprocess.check_output(['wc', '-l', csv_filename
                                         ]).split(' ')[0]) - 1
            filterKeys = ['url', 'wot', 'cookie', 'redirected']
            FeaturesName = list((set(jsonData[0].keys()) - set(filterKeys)))
            count = 0
            tried = 0
            # building matrix wiht 1000 samples
            while True:
                tried += 1
                try:
                    sample = getjsonData()
                    featurevalue, alexaScore, wotScore = getFeatureValue(
                        [sample], FeaturesName)
                    if checksimiliarData(featurevalue[0]):