class WafEnv_v0(gym.Env): metadata = { 'render.modes': ['human', 'rgb_array'], } def __init__(self): self.action_space = spaces.Discrete(len(ACTION_LOOKUP)) #xss样本特征集合 #self.samples=[] #当前处理的样本 self.current_sample = "" #self.current_state=0 self.features_extra = Features() self.waf_checker = Waf_Check() #根据动作修改当前样本免杀 self.xss_manipulatorer = Xss_Manipulator() self._reset() def _step(self, action): r = 0 is_gameover = False #print "current sample:%s" % self.current_sample _action = ACTION_LOOKUP[action] #print "action is %s" % _action self.current_sample = self.xss_manipulatorer.modify( self.current_sample, _action) #print "change current sample to %s" % self.current_sample if not self.waf_checker.check_xss(self.current_sample): #给奖励 r = 10 is_gameover = True print "Good!!!!!!!avoid waf:%s" % self.current_sample self.observation_space = self.features_extra.extract( self.current_sample) return self.observation_space, r, is_gameover, {} def _reset(self): self.current_sample = random.choice(samples_train) print "reset current_sample=" + self.current_sample self.observation_space = self.features_extra.extract( self.current_sample) return self.observation_space def render(self, mode='human', close=False): return
def predict(): spam_detect = Spam_Detect() features_extract = Features(vocabulary_file) if request.method == 'POST': if 'train' in request.form: print('Predict and Train') train_nb_spam() email = request.form['email'] data = [email] featurevectors = features_extract.extract(data) my_prediction = spam_detect.detect(featurevectors) return render_template('result.html', prediction=my_prediction)
class CarModel(): def __init__(self, mode='svm'): if mode != 'svm' and mode != 'xgboost': raise ValueError('Unknown mode for CarModel') self._f = Features() self._model = None self._mode = mode self.input_shape = None def prepare(self, data, mode='standard'): self.input_shape = data.x_orig[0].shape features = self._f.extract(data.x_orig) self._f.fit_scaler(features, mode=mode) x = self._f.normalize(features) data.put_features(x) self._colormap = data.colormap return data def fit(self, data, random_state=11, show=False): if data.x is None: raise ValueError('Dataset does not have input values') x = data.x y = data.y_orig train, test = self._split_data(x, y) self._train(train, test, show=show) def predict(self, im, show=False): f = self._f im = common.cvt_color(im, color=self._colormap, src='RGB') pred = self._model.predict(f.normalize(f.extract(np.array([im]), show=show))) return pred[0] def _split_data(self, x, y, test_size=0.1, random_state=11): xtr, xt, ytr, yt = train_test_split(x, y, test_size=test_size, random_state=random_state) return (xtr, ytr), (xt, yt) def _one_hot_encode(self, y): width = np.unique(y).shape[0] height = y.shape[0] one = np.zeros((height, width), dtype=np.int32) one[range(height), y] = 1 return one def _train(self, train, test, random_state=11, show=False): x, y = train xtest, ytest = test if self._mode == 'xgboost': self._model = XGBClassifier( learning_rate=0.1, n_estimators=150, max_depth=5, min_child_weight=1, gamma=0, subsample=0.8, colsample_bytree=0.8, objective='binary:logistic', nthread=4, scale_pos_weight=1, seed=random_state) self._model.fit(x, y, eval_metric='auc') else: self._model = LinearSVC(random_state=random_state) #self._model = SVC(kernel='rbf', max_iter=25000, random_state=random_state) #self._model = SVC(max_iter=25000, random_state=random_state) self._model.fit(x, y) pred = self._model.predict(xtest) acc_msg = "Test accuracy: {0:.05f}" print(acc_msg.format(metrics.accuracy_score(ytest, pred))) if self._mode == 'xgboost': pred_prob = self._model.predict_proba(xtest) ytest_hot = self._one_hot_encode(ytest) auc_msg = "AUC score: {0:.05f}" print(auc_msg.format(metrics.roc_auc_score(ytest_hot, pred_prob))) if self._mode == 'xgboost' and show == True: importance = pd.Series(self._model.booster().get_fscore()).sort_values(ascending=False) importance.plot(kind='bar', title='Feature Importance') plt.show() print(importance[:10]) def load(self, filename='data/model.p'): with open(filename, 'rb') as fd: self.__dict__ = pickle.load(fd) def save(self, filename='data/model.p'): with open(filename, 'wb') as fd: pickle.dump(self.__dict__, fd)
for upper, lower in data: print upper, lower for i, c in enumerate(set(list(upper))): if c not in Sigma: Sigma_inv[len(Sigma)] = c Sigma[c] = len(Sigma) for i, c in enumerate(set(list(lower))): if c not in Sigma: Sigma_inv[len(Sigma)] = c Sigma[c] = len(Sigma) features = Features(Sigma, Sigma_inv) for upper, lower in data: #print upper, lower, len(features.features) features.extract(upper, URC=0, ULC=0,create=True) break print len(features.features) print features.num_extracted for k, v in features.features.items(): print k, v #print features._right_context(2, "hello", 4) #print features._left_context(2, "helloword", 7)
im.show() data = [] #log device = InputDevice('/dev/input/event5') skip = 0 m = 10 i = 0 X = np.zeros([m,2], dtype='int16') y = np.zeros(m, dtype='bool') for event in device.read_loop(): if i == m: break if event.type == ecodes.BTN_MOUSE && event.value == 1 && skip <= 0: #mousedown pb = pb.get_from_drawable(w,cm,58,140,0,0,*sz) im = Image(pb.get_pixels_array()) #creates simplecv image from pixbuf click = True f = Features(im, click) extracted = f.extract() if not extracted: skip = 4 else: X[i] = [f.x_disp, f.y_disp] y[i] = f.click i += 1 skip -= 1
def check_spam(self, featurevectors): #[[ 0.96085352 0.03914648]] 返回的是垃圾邮件的概率 y_pred = self.clf.predict_proba([featurevectors])[0, -1] #大于阈值的判断为垃圾邮件 label = float(y_pred >= local_model_threshold) return label if __name__ == '__main__': train_mlp_spam() spam_Check = Spam_Check() features_extract = Features(vocabulary_file) featurevectors = features_extract.extract( "thank you ,your email address was obtained from a purchased list ," "reference # 2020 mid = 3300 . if you wish to unsubscribe") spam_Check.check_spam(featurevectors) samples_train, samples_test = load_all_spam() sum = 0 success = 0 for sample in samples_test: sum += 1 featurevectors = features_extract.extract(sample) label = spam_Check.check_spam(featurevectors) print label if label == 1.0: success += 1
for c in list(one): if c not in Sigma: Sigma[c] = len(Sigma) for c in list(two): if c not in Sigma: Sigma[c] = len(Sigma) Sigma_inv = {} for x, y in Sigma.items(): Sigma_inv[y] = x # features features = Features(Sigma, Sigma_inv) for upper, lower in train_str: print upper features.extract(upper, create=True) for upper, lower, in test_str: features.extract(upper, create=False) """ seen = set([]) for i in xrange(len("jason")+1): for j in xrange(len(Sigma)): dell = features.extracted[0][i][j][0][1] <<<<<<< HEAD # parse the arguments args = p.parse_args() # read in the data size = 320