def calculateWeightage(): global totalSample global jsonData global Featureset csv_filename = "WebcredNormalized.csv" f = open(csv_filename, 'r') data = f.readlines() pipe = Pipeline() # get json data jsonData = pipe.converttojson(data) totalSets = 10 # sets of possible weightages weightage = [] totalSample = int( subprocess.check_output(['wc', '-l', csv_filename]).split(' ')[0]) - 1 filterKeys = ['url', 'wot', 'cookie', 'redirected'] FeaturesName = list((set(jsonData[0].keys()) - set(filterKeys))) Featureset = [] alexaScoreSet = [] for i in range(totalSets): count = 0 # select sample sets while True: sample = getjsonData() featurevalue, alexaScore, wotScore = getFeatureValue([sample], FeaturesName) if checksimiliarData(featurevalue[0]): Featureset.append(featurevalue[0]) alexaScoreSet.append(alexaScore) count += 1 if count == len(FeaturesName) - 1: # sum of all weightage== 1, misc. genre == 0.1 temp = [] for j in range(len(FeaturesName)): temp.append(1) Featureset.append(temp) alexaScoreSet.append([0.9]) break # get weightage of individual feature weightage.append(getWeightage(Featureset, alexaScoreSet)) print 'getting', i, 'set of weightages' finalWeightage = np.mean(weightage, axis=0).tolist() total = 0 for i in finalWeightage: total += i print total print finalWeightage
print weightage, alexaSimilarityScore elif action == 'bn': global totalSample global jsonData global Featureset Featureset = [] alexaScoreSet = [] wotScoreSet = [] csv_filename = "WebcredNormalized.csv" f = open(csv_filename, 'r') data = f.readlines() pipe = Pipeline() # get json data jsonData = pipe.converttojson(data) totalSample = int( subprocess.check_output(['wc', '-l', csv_filename ]).split(' ')[0]) - 1 filterKeys = ['url', 'wot', 'cookie', 'redirected'] FeaturesName = list((set(jsonData[0].keys()) - set(filterKeys))) count = 0 tried = 0 # building matrix wiht 1000 samples while True: tried += 1 try: sample = getjsonData() featurevalue, alexaScore, wotScore = getFeatureValue( [sample], FeaturesName) if checksimiliarData(featurevalue[0]):