示例#1
0
def generateFeatureDictionary(addresskeys, keyAddressCounts):
    logger.info("generating feature list")
    featurelist = defaultdict(list)

    for keypressed in keyAddressCounts:
        for key in addresskeys:
            if key in keyAddressCounts[keypressed]:
                featurelist[key].append(keyAddressCounts[keypressed][key])
            else:
                featurelist[key].append(0)
        featurelist['class'].append(charName(keypressed.split('_')[0]))

    #print featurelist['0x1f37']
    logger.debug("featurelist length %d", len(featurelist))

    return featurelist
 def preprocess_outlier(self, data_numbers):
     gb = self.df.groupby('class')
     for value in data_numbers:
         #print('ajaya',df['class'],'***********************',value)
         #data = df['ground_truth'].loc[df['class'] == value]
         #print(gb)
         data = gb.get_group(value)
         #print(data['ground_truth'])
         rmo = outliers(data['ground_truth'])
         temp_df = pd.DataFrame(rmo.remove_outliers())
         #print("neupane",temp_df.head())
         temp_df['class'] = charName(value)
         #print('ajaya',temp_df['class'])
         df_list.append(temp_df)
     print(df.head())
     df_all = pd.concat(df_list, axis=0)
     return df_all
#training_guesses = list_of_guesses(train_distribution)
test_distribution = 'distribution/distribution_test.csv'
test_guesses = list_of_guesses(test_distribution)

#print(training_guesses)
#print(test_guesses)
file_out = open('results_modeled_with_confidence_avg.csv', 'w')
word_given = ""
for p_word in words:
    p_word = p_word.lower()
    print("***************************")
    print(p_word)
    keys = []
    for a_char in p_word:
        print(a_char)
        keys.append(test_guesses[charName(a_char)])  #if you turn on append

    print(keys)
    x = [keys[i].index(charName(p_word[i])) for i in range(len(p_word))]
    print(x)
    ret = 0

    #file_out = open('results_modeled_with_confidence_dfs.csv','w')
    # for i in range(len(p_word)):
    # 	z = 1
    # 	for j in range(i+1,len(p_word)):
    # 		z = z * len(keys[j])
    # 	ret += x[i] * z

    #file_out = open('results_modeled_with_confidence_pow.csv','w')
    #ret = pow(max(x)+1,len(p_word))