def generateFeatureDictionary(addresskeys, keyAddressCounts): logger.info("generating feature list") featurelist = defaultdict(list) for keypressed in keyAddressCounts: for key in addresskeys: if key in keyAddressCounts[keypressed]: featurelist[key].append(keyAddressCounts[keypressed][key]) else: featurelist[key].append(0) featurelist['class'].append(charName(keypressed.split('_')[0])) #print featurelist['0x1f37'] logger.debug("featurelist length %d", len(featurelist)) return featurelist
def preprocess_outlier(self, data_numbers): gb = self.df.groupby('class') for value in data_numbers: #print('ajaya',df['class'],'***********************',value) #data = df['ground_truth'].loc[df['class'] == value] #print(gb) data = gb.get_group(value) #print(data['ground_truth']) rmo = outliers(data['ground_truth']) temp_df = pd.DataFrame(rmo.remove_outliers()) #print("neupane",temp_df.head()) temp_df['class'] = charName(value) #print('ajaya',temp_df['class']) df_list.append(temp_df) print(df.head()) df_all = pd.concat(df_list, axis=0) return df_all
#training_guesses = list_of_guesses(train_distribution) test_distribution = 'distribution/distribution_test.csv' test_guesses = list_of_guesses(test_distribution) #print(training_guesses) #print(test_guesses) file_out = open('results_modeled_with_confidence_avg.csv', 'w') word_given = "" for p_word in words: p_word = p_word.lower() print("***************************") print(p_word) keys = [] for a_char in p_word: print(a_char) keys.append(test_guesses[charName(a_char)]) #if you turn on append print(keys) x = [keys[i].index(charName(p_word[i])) for i in range(len(p_word))] print(x) ret = 0 #file_out = open('results_modeled_with_confidence_dfs.csv','w') # for i in range(len(p_word)): # z = 1 # for j in range(i+1,len(p_word)): # z = z * len(keys[j]) # ret += x[i] * z #file_out = open('results_modeled_with_confidence_pow.csv','w') #ret = pow(max(x)+1,len(p_word))