Python Features.extract示例

class WafEnv_v0(gym.Env):
    metadata = {
        'render.modes': ['human', 'rgb_array'],
    }

    def __init__(self):
        self.action_space = spaces.Discrete(len(ACTION_LOOKUP))

        #xss样本特征集合
        #self.samples=[]
        #当前处理的样本
        self.current_sample = ""
        #self.current_state=0
        self.features_extra = Features()
        self.waf_checker = Waf_Check()
        #根据动作修改当前样本免杀
        self.xss_manipulatorer = Xss_Manipulator()

        self._reset()

    def _step(self, action):

        r = 0
        is_gameover = False
        #print "current sample:%s" % self.current_sample

        _action = ACTION_LOOKUP[action]
        #print "action is %s" % _action

        self.current_sample = self.xss_manipulatorer.modify(
            self.current_sample, _action)
        #print "change current sample to %s" % self.current_sample

        if not self.waf_checker.check_xss(self.current_sample):
            #给奖励
            r = 10
            is_gameover = True
            print "Good!!!!!!!avoid waf:%s" % self.current_sample

        self.observation_space = self.features_extra.extract(
            self.current_sample)

        return self.observation_space, r, is_gameover, {}

    def _reset(self):
        self.current_sample = random.choice(samples_train)
        print "reset current_sample=" + self.current_sample

        self.observation_space = self.features_extra.extract(
            self.current_sample)
        return self.observation_space

    def render(self, mode='human', close=False):
        return

示例#2

显示文件

def predict():
    spam_detect = Spam_Detect()
    features_extract = Features(vocabulary_file)
    if request.method == 'POST':
        if 'train' in request.form:
            print('Predict and Train')
            train_nb_spam()
        email = request.form['email']
        data = [email]
        featurevectors = features_extract.extract(data)
        my_prediction = spam_detect.detect(featurevectors)
        return render_template('result.html', prediction=my_prediction)

示例#3

显示文件

文件： model.py 项目： awav/carnd-p5

class CarModel():
    def __init__(self, mode='svm'):
        if mode != 'svm' and mode != 'xgboost':
            raise ValueError('Unknown mode for CarModel')
        self._f = Features()
        self._model = None
        self._mode = mode
        self.input_shape = None
    def prepare(self, data, mode='standard'):
        self.input_shape = data.x_orig[0].shape
        features = self._f.extract(data.x_orig)
        self._f.fit_scaler(features, mode=mode)
        x = self._f.normalize(features)
        data.put_features(x)
        self._colormap = data.colormap
        return data
    def fit(self, data, random_state=11, show=False):
        if data.x is None:
            raise ValueError('Dataset does not have input values')
        x = data.x
        y = data.y_orig
        train, test = self._split_data(x, y)
        self._train(train, test, show=show)
    def predict(self, im, show=False):
        f = self._f
        im = common.cvt_color(im, color=self._colormap, src='RGB')
        pred = self._model.predict(f.normalize(f.extract(np.array([im]), show=show)))
        return pred[0]
    def _split_data(self, x, y, test_size=0.1, random_state=11):
        xtr, xt, ytr, yt = train_test_split(x, y, test_size=test_size, random_state=random_state)
        return (xtr, ytr), (xt, yt)
    def _one_hot_encode(self, y):
        width = np.unique(y).shape[0]
        height = y.shape[0]
        one = np.zeros((height, width), dtype=np.int32)
        one[range(height), y] = 1
        return one
    def _train(self, train, test, random_state=11, show=False):
        x, y = train
        xtest, ytest = test
        if self._mode == 'xgboost':
            self._model = XGBClassifier(
                              learning_rate=0.1,
                              n_estimators=150,
                              max_depth=5,
                              min_child_weight=1,
                              gamma=0,
                              subsample=0.8,
                              colsample_bytree=0.8,
                              objective='binary:logistic',
                              nthread=4,
                              scale_pos_weight=1,
                              seed=random_state)
            self._model.fit(x, y, eval_metric='auc')
        else:
            self._model = LinearSVC(random_state=random_state)
            #self._model = SVC(kernel='rbf', max_iter=25000, random_state=random_state)
            #self._model = SVC(max_iter=25000, random_state=random_state)
            self._model.fit(x, y)
        pred = self._model.predict(xtest)
        acc_msg = "Test accuracy: {0:.05f}"
        print(acc_msg.format(metrics.accuracy_score(ytest, pred)))
        if self._mode == 'xgboost':
                pred_prob = self._model.predict_proba(xtest)
                ytest_hot = self._one_hot_encode(ytest)
                auc_msg = "AUC score: {0:.05f}"
                print(auc_msg.format(metrics.roc_auc_score(ytest_hot, pred_prob)))
        if self._mode == 'xgboost' and show == True:
            importance = pd.Series(self._model.booster().get_fscore()).sort_values(ascending=False)
            importance.plot(kind='bar', title='Feature Importance')
            plt.show()
            print(importance[:10])
    def load(self, filename='data/model.p'):
        with open(filename, 'rb') as fd:
             self.__dict__ = pickle.load(fd)
    def save(self, filename='data/model.p'):
        with open(filename, 'wb') as fd:
             pickle.dump(self.__dict__, fd)

示例#4

显示文件

for upper, lower in data:
    print upper, lower
    for i, c in enumerate(set(list(upper))):
        if c not in Sigma:
            Sigma_inv[len(Sigma)] = c
            Sigma[c] = len(Sigma)

    for i, c in enumerate(set(list(lower))):
        if c not in Sigma:
            Sigma_inv[len(Sigma)] = c
            Sigma[c] = len(Sigma)
            
        
        
features = Features(Sigma, Sigma_inv)
for upper, lower in data:
    #print upper, lower, len(features.features)
    features.extract(upper, URC=0, ULC=0,create=True)
    break

print len(features.features)

print features.num_extracted


for k, v in  features.features.items():
    print k, v
#print features._right_context(2, "hello", 4)
#print features._left_context(2, "helloword", 7)

示例#5

显示文件

文件： collect.py 项目： troq/flappy-bird-player

im.show()
data = []

#log 
device = InputDevice('/dev/input/event5')

skip = 0

m = 10
i = 0

X = np.zeros([m,2], dtype='int16')
y = np.zeros(m, dtype='bool')

for event in device.read_loop():
    if i == m:
        break
    if event.type == ecodes.BTN_MOUSE && event.value == 1 && skip <= 0: #mousedown
        pb = pb.get_from_drawable(w,cm,58,140,0,0,*sz)
        im = Image(pb.get_pixels_array()) #creates simplecv image from pixbuf
        click = True
        f = Features(im, click)
        extracted = f.extract()
        if not extracted:
            skip = 4
        else:
            X[i] = [f.x_disp, f.y_disp]
            y[i] = f.click
            i += 1
        skip -= 1

示例#6

显示文件

    def check_spam(self, featurevectors):
        #[[ 0.96085352  0.03914648]]  返回的是垃圾邮件的概率
        y_pred = self.clf.predict_proba([featurevectors])[0, -1]
        #大于阈值的判断为垃圾邮件
        label = float(y_pred >= local_model_threshold)
        return label


if __name__ == '__main__':

    train_mlp_spam()

    spam_Check = Spam_Check()
    features_extract = Features(vocabulary_file)
    featurevectors = features_extract.extract(
        "thank you ,your email address was obtained from a purchased list ,"
        "reference # 2020 mid = 3300 . if you wish to unsubscribe")
    spam_Check.check_spam(featurevectors)

    samples_train, samples_test = load_all_spam()

    sum = 0
    success = 0

    for sample in samples_test:
        sum += 1
        featurevectors = features_extract.extract(sample)
        label = spam_Check.check_spam(featurevectors)
        print label
        if label == 1.0:
            success += 1

示例#7

显示文件

        for c in list(one):
            if c not in Sigma:
                Sigma[c] = len(Sigma)
        for c in list(two):
            if c not in Sigma:
                Sigma[c] = len(Sigma)

    Sigma_inv = {}
    for x, y in Sigma.items():
        Sigma_inv[y] = x

    # features
    features = Features(Sigma, Sigma_inv)
    for upper, lower in train_str:
        print upper
        features.extract(upper, create=True)
    for upper, lower, in test_str:
        features.extract(upper, create=False)
    """
    seen = set([])
    for i in xrange(len("jason")+1):
        for j in xrange(len(Sigma)):
            dell = features.extracted[0][i][j][0][1]

<<<<<<< HEAD
# parse the arguments
args = p.parse_args()


# read in the data
size = 320