示例#1
0
 def __init__(self, numGestures, minDescriptorsPerFrame, numWords, descType,
              numPredictions, parent):
     self.numGestures = numGestures
     self.numWords = numWords
     self.minDescriptorsPerFrame = minDescriptorsPerFrame
     self.parent = parent
     self.classifier = None
     self.windowName = "Testing preview"
     self.handWindowName = "Cropped hand"
     self.binaryWindowName = "Binary frames"
     self.predictionList = [-1] * numPredictions
     self.handTracker = HandTracker(kernelSize=7,
                                    thresholdAngle=0.4,
                                    defectDistFromHull=30,
                                    parent=self)
     self.featureExtractor = FeatureExtractor(type=descType, parent=self)
     self.numSideFrames = 10
     self.prevFrameList = np.zeros(
         (self.numSideFrames, self.parent.imHeight / self.numSideFrames,
          self.parent.imWidth / self.numSideFrames, 3), "uint8")
     self.numPrevFrames = 0
     self.predictionScoreThreshold = 0.2
     self.learningRate = 0.01
     self.numReinforce = 1
示例#2
0
 def __set_fast_data(img_file_path, lbl):
     payload = list()
     db_handle = MongoDB()
     feature_vector = FeatureExtractor().get_features(img_file_path)
     feature_map = dict()
     key_p = os.path.splitext(os.path.basename(img_file_path))
     key = key_p[0] + '_' + key_p[1][1:] + '_' + str(
         int(time.time() * 1000.0))
     key = key.replace('.', '_')
     feature_map['file'] = key
     feature_map['label'] = lbl
     feature_map['feature'] = feature_vector
     payload.append(feature_map)
     try:
         db_handle.to_db(payload=payload,
                         key=None,
                         db=MONGO_HOPS_DB,
                         collection=MONGO_XRAY_COLLECTION)
         payload.clear()
         db_handle.close()
     except Exception as e:
         db_handle.close()
         print(img_file_path)
         print("Ignoring Exception : " + str(e))
 def __init__(self, numGestures, numFramesPerGesture,
              minDescriptorsPerFrame, numWords, descType, kernel, numIter,
              parent):
     self.numGestures = numGestures
     self.numFramesPerGesture = numFramesPerGesture
     self.numWords = numWords
     self.minDescriptorsPerFrame = minDescriptorsPerFrame
     self.parent = parent
     self.desList = []
     self.voc = None
     self.classifier = None
     self.windowName = "Training preview"
     self.handWindowName = "Cropped hand"
     self.binaryWindowName = "Binary frames"
     self.handTracker = HandTracker(kernelSize=7,
                                    thresholdAngle=0.4,
                                    defectDistFromHull=30,
                                    parent=self)
     self.featureExtractor = FeatureExtractor(type=descType, parent=self)
     self.kernel = kernel
     self.numIter = numIter
     self.numDefects = None
     self.firstFrameList = []
     self.trainLabels = []
示例#4
0
 def get_column_names(self):
     """Get column names."""
     columns = FeatureExtractor(self.config_path).column_names + ['label']
     return columns
示例#5
0
featureExtractorModulePath = '/cab0/wammar/exp/feat-ext'
if featureExtractorModulePath not in sys.path:
  sys.path.append(featureExtractorModulePath)
from feature_extractor import FeatureExtractor
logResModulePath = '/cab0/wammar/exp/log-reg/source'
if logResModulePath not in sys.path:
  sys.path.append(logResModulePath)
from log_reg import *


#######################
# SUPERVISED LEARNING #
#######################

# initialize the feature extractor
extractor = FeatureExtractor('|')

# for each labeled example in raw/docId-label, write a line in features/labeled.txt. Instead of the site-ID in the original file, write the non-zero feature IDs and their values.
labeledFeaturesFilename = '{0}.labeled'.format(outputPrefix)
WriteFeaturesFile('{0}/docId-label.txt'.format(rawDir), labeledFeaturesFilename)

# initialize logistic regressin model
logReg = LogisticRegression()

# specify learning info
learningInfo = LearningInfo(
  stoppingCriterion = StoppingCriterion.TRAIN_LOGLIKELIHOOD,
  stoppingCriterionThreshold = 0.00001,
  positiveDevSetSize = 0, 
  negativeDevSetSize = 0,
  minTrainingIterationsCount = 3,
def draw_features_from_db(action,
                          db,
                          volt_collection,
                          tag_collection,
                          port=27017,
                          host='localhost',
                          ndevices=3,
                          offset=0,
                          action_num=0):
    client = MongoClient(port=port, host=host)
    database = client[db]
    tag_collection = database[tag_collection]
    volt_collection = database[volt_collection]

    try:
        if volt_collection.count_documents(
            {}) + tag_collection.count_documents({}) < 2:
            raise CollectionError('Collection not found!')
    except CollectionError as e:
        print(e.message)

    ntags = tag_collection.count_documents({'tag': action})

    title = config['volt_collection'][6:] + "" + action + "_features"
    fig = plt.figure(title, figsize=(6, 8))

    # 根据时间采集数据,基本单位为s,比如1s、10s、30s、60s
    # interval表示每次分析的时间跨度,rate表示间隔多长时间进行一次分析
    interval = 1
    rate = 1
    fig.suptitle(action + " (" + "interval:" + str(interval) + "s, " +
                 "stepsize:" + str(rate) + "s)")

    # 定义特征提取器
    extractor = FeatureExtractor()

    for feature in feature_names:
        # 定义特征提取模块
        module = eval(feature + "(" + str(interval) + "," + str(rate) + ")")
        # 注册特征提取模块
        extractor.register(module)

    # 定义画布左右位置的计数:标签累加,即人数累加
    tag_acc = 1

    # read the data that is of a certain action one by one
    for tag in tag_collection.find({'tag': action}):
        inittime, termtime = tag['inittime'], tag['termtime']

        # get the arrays according to which we will plot later
        times, volts = {}, {}
        for i in range(1, ndevices + 1):
            times[i] = []
            volts[i] = []

        sampling_counter = 0
        sampling_factor = 3  #表示sampling_factor个数据只下采样一个数据
        for volt in volt_collection.find(
            {'time': {
                '$gt': inittime,
                '$lt': termtime
            }}):
            if (sampling_counter % sampling_factor == 0):
                device_no = int(volt['device_no'])
                v = volt['voltage']
                time = volt['time']
                times[device_no].append(time)
                volts[device_no].append(v)
                sampling_counter = 1
            sampling_counter += 1

        # 定义存储时间、特征列表
        feature_times, feature_values = {}, {}
        for i in range(1, ndevices + 1):
            feature_times[i] = []
            from collections import defaultdict
            feature_values[i] = defaultdict(list)
            for feature in feature_names:
                feature_values[i][feature[:-6]] = []

        # 提取第几个设备的特征
        start = 1
        end = ndevices

        # 对每个采集设备进行特征提取 ndevices
        for i in range(start, end + 1):
            for j in range(len(volts[i])):
                value = {"time": times[i][j], "volt": volts[i][j]}
                output = extractor.process(value)
                if (output):
                    features = {
                        "device_no": i,
                        "feature_time": times[i][j],
                        "feature_value": output,
                        "interval": interval,
                        "rate": rate
                    }
                    feature_times[i].append(features['feature_time'])
                    for feature_type in feature_values[i].keys():
                        feature_values[i][feature_type].append(
                            features['feature_value'][feature_type])

            # 清理所有模块,防止过期数据
            extractor.clear()

        # 定义特征数量
        nfeatures = len(feature_values[1])

        # 定义特征类型
        feature_type = list(
            feature_values[1].keys())  # keys()方法虽然返回的是列表,但是不可以索引

        for i in range(start, end + 1):

            # 如果文件存在,则以添加的方式打开
            if (os.path.exists("feature_matrixs/feature_matrix" + str(i) +
                               ".npy")):
                feature_matrix = np.load("feature_matrixs/feature_matrix" +
                                         str(i) + ".npy")
                label_matrix = np.load("feature_matrixs/label_matrix" +
                                       str(i) + ".npy")
                temp_matrix = np.zeros((len(feature_times[i]), nfeatures),
                                       dtype=float)

                os.remove("feature_matrixs/feature_matrix" + str(i) + ".npy")
                os.remove("feature_matrixs/label_matrix" + str(i) + ".npy")

                for j in range(len(feature_times[i])):
                    for k in range(nfeatures):
                        temp_matrix[j][k] = feature_values[i][
                            feature_type[k]][j]
                    label_matrix = np.append(label_matrix, [action_num])

                # np.append(feature_matrixs, [temp_matrix], axis=0)
                feature_matrix = np.insert(feature_matrix,
                                           feature_matrix.shape[0],
                                           values=temp_matrix,
                                           axis=0)

                np.save('feature_matrixs/feature_matrix' + str(i),
                        feature_matrix)
                np.save('feature_matrixs/label_matrix' + str(i), label_matrix)

                print("feature_matrix" + str(i) + ":" +
                      str(feature_matrix.shape))

            # 如果文件不存在,则定义特征矩阵和标签矩阵
            else:
                feature_matrix = np.zeros((len(feature_times[i]), nfeatures),
                                          dtype=float)
                label_matrix = np.zeros((len(feature_times[i]), 1), dtype=int)

                for j in range(len(feature_times[i])):
                    for k in range(nfeatures):
                        feature_matrix[j][k] = feature_values[i][
                            feature_type[k]][j]
                    label_matrix[j] = action_num
                # np.save保存时自动为8位小数
                np.save('feature_matrixs/feature_matrix' + str(i),
                        feature_matrix)
                np.save('feature_matrixs/label_matrix' + str(i), label_matrix)

                print("feature_matrix" + str(i) + ":" +
                      str(feature_matrix.shape))

        tag_acc += 1
示例#7
0
 def __init__(self):
     self.ut = Utility()
     self.fwe = FiveWExtractor()
     self.fex = FeatureExtractor()
     self.nlp = NLPHelper()
     self.tr = ModelTrainer()
示例#8
0
import numpy as np
import pickle
import math
from data_reader import DataReader
from feature_extractor import FeatureExtractor
from sklearn.naive_bayes import GaussianNB

data_reader = DataReader(
)  # reads the images files and converts them into numpy 2D arrays
feature_extractor = FeatureExtractor(
)  # calculates the eigenfaces. Follows the fit->transform paradigm.
clf = GaussianNB(
)  # a naive bayes classifier where the individual variables are supposed to follow a gaussian distribution

# since the number of images available is relatively low (400 images),
# we'll use cross-validation to assess the performance of the face recognition system.
data = data_reader.getAllData(
    shuffle=True)  # we shuffle the data so we can do Cross-Validation
num_folds = 10
fold_length = math.floor(len(data[0]) / num_folds)
average_accuracy = 0.0  # the performance measure of the system

for k in range(num_folds):
    # get train data and test data from data
    train_data, test_data = [None, None], [None, None]
    for i in range(2):
        if k == num_folds - 1:
            train_data[i] = data[i][:k * fold_length]
            test_data[i] = data[i][k * fold_length:]

        else:
示例#9
0
    def __init__(self, vocab, options):

        # import here so we don't load Dynet if just running parser.py --help for example
        from multilayer_perceptron import MLP
        from feature_extractor import FeatureExtractor
        import dynet as dy
        global dy

        global LEFT_ARC, RIGHT_ARC, SHIFT, SWAP
        LEFT_ARC, RIGHT_ARC, SHIFT, SWAP = 0, 1, 2, 3

        global NO_COMP, SOFT_COMP, HARD_COMP, GEN_COMP
        NO_COMP, HARD_COMP, SOFT_COMP, GEN_COMP = 0, 1, 2, 3

        self.composition = options.nucleus_composition

        all_rels = vocab[5]
        functional_rels = ['det', 'case', 'clf', 'cop', 'mark', 'aux', 'cc']
        if self.composition in [HARD_COMP, SOFT_COMP]:
            self.compositional_relations = functional_rels
        elif self.composition in [GEN_COMP]:
            self.compositional_relations = all_rels
        else:
            self.compositional_relations = []

        self.compositional_relations_dict = {
            rel: idx
            for idx, rel in enumerate(self.compositional_relations)
        }

        self.model = dy.ParameterCollection()
        self.trainer = dy.AdamTrainer(self.model, alpha=options.learning_rate)

        self.activations = {
            'tanh':
            dy.tanh,
            'sigmoid':
            dy.logistic,
            'relu':
            dy.rectify,
            'tanh3':
            (lambda x: dy.tanh(dy.cwise_multiply(dy.cwise_multiply(x, x), x)))
        }
        self.activation = self.activations[options.activation]

        self.oracle = options.oracle

        self.headFlag = options.headFlag
        self.rlMostFlag = options.rlMostFlag
        self.rlFlag = options.rlFlag
        self.k = options.k

        #dimensions depending on extended features
        self.nnvecs = (1 if self.headFlag else 0) + (2 if self.rlFlag
                                                     or self.rlMostFlag else 0)
        self.feature_extractor = FeatureExtractor(self.model, options, vocab,
                                                  self.nnvecs)
        self.irels = self.feature_extractor.irels

        if options.no_bilstms > 0:
            mlp_in_dims = options.lstm_output_size * 2 * self.nnvecs * (
                self.k + 1)
        else:
            mlp_in_dims = self.feature_extractor.lstm_input_size * self.nnvecs * (
                self.k + 1)
        print("The size of the MLP input layer is {0}".format(mlp_in_dims))

        if self.composition in [SOFT_COMP, GEN_COMP]:
            rel_emb_sz = 10
            self.cmp_rel_lookup = self.model.add_lookup_parameters(
                (len(self.compositional_relations), rel_emb_sz))
            cmb_sz = 2 * 2 * options.lstm_output_size + rel_emb_sz
            out_sz = 2 * options.lstm_output_size
            self.combiner_W1 = self.model.add_parameters((out_sz, cmb_sz),
                                                         name='cmbW1')
            self.combiner_b1 = self.model.add_parameters(out_sz, name='cmbb1')

        self.unlabeled_MLP = MLP(self.model, 'unlabeled', mlp_in_dims,
                                 options.mlp_hidden_dims,
                                 options.mlp_hidden2_dims, 4, self.activation)
        self.labeled_MLP = MLP(self.model, 'labeled', mlp_in_dims,
                               options.mlp_hidden_dims,
                               options.mlp_hidden2_dims,
                               2 * len(self.irels) + 2, self.activation)
示例#10
0

if __name__ == '__main__':
    model_file = "model.pkl"
    raw_file = sys.argv[1]
    annotations_file = sys.argv[2]
    if check_file(raw_file):
        print "file is in wrong format expected raw and not proccessed file"

    data = {}
    for sen_id, sen in utils.read_lines(sys.argv[1]):
        data[sen_id] = utils.nlp(sen)

    annotation_sentences = load_annotation_sentences(annotations_file)
    lexicon_helper = Lexicon_helper()
    feature_extractor = FeatureExtractor(lexicon_helper)

    extracted_ents_rules = rules_extractor.predict(data, lexicon_helper)
    extracted_ents_rules = sorted(extracted_ents_rules,
                                  key=utils.get_senid_int)

    sen_entities_with_x = get_x_data(feature_extractor, data)
    filtered_sen_entities_with_x = filter_ents(sen_entities_with_x,
                                               extracted_ents_rules)

    tagged_sen_entites = tag_entities(filtered_sen_entities_with_x,
                                      annotation_sentences)
    clf = LinearSVC(random_state=0, tol=1e-5)
    allx = np.array([x[3].toarray()[0] for x in tagged_sen_entites])
    yall = np.array([y[4] for y in tagged_sen_entites])
    clf.fit(allx, yall)
示例#11
0
def feature_to_matrix_file(action,
                           db,
                           volt_collection,
                           tag_collection,
                           port=27017,
                           host='localhost',
                           ndevices=3,
                           offset=0,
                           action_num=0,
                           interval=2,
                           rate=1):
    # 根据时间采集数据,基本单位为s,比如1s、10s、30s、60s
    # interval表示每次分析的时间跨度,rate表示间隔多长时间进行一次分析
    # print(interval,rate)
    # 针对不同动作,设置不同时间窗口
    # if(action == "turn_over"):
    #     interval = 2
    #     rate = 1
    client = MongoClient(port=port, host=host)
    database = client[db]
    tag_collection = database[tag_collection]
    volt_collection = database[volt_collection]

    try:
        if volt_collection.count_documents(
            {}) + tag_collection.count_documents({}) < 2:
            raise CollectionError('Collection not found!')
    except CollectionError as e:
        print(e.message)

    ntags = tag_collection.count_documents({'tag': action})
    # 提取第几个设备的特征
    start = 1
    end = ndevices

    # 定义特征提取器
    extractor = FeatureExtractor()

    for feature in feature_names:
        # 定义特征提取模块
        module = eval(feature + "(" + str(interval) + "," + str(rate) + ")")
        # 注册特征提取模块
        extractor.register(module)

    # 定义画布左右位置的计数:标签累加,即人数累加
    tag_acc = 0

    # read the data that is of a certain action one by one
    for tag in tag_collection.find({'tag': action}):
        tag_acc += 1
        if (tag_acc > 8):
            break
        print("people_" + str(tag_acc))
        inittime, termtime = tag['inittime'], tag['termtime']

        # get the arrays according to which we will plot later
        times, volts, filter_volts, normalize_volts = {}, {}, {}, {}
        for i in range(start, ndevices + 1):
            times[i] = []
            volts[i] = []
            filter_volts[i] = []
            normalize_volts[i] = []

        for volt in volt_collection.find(
            {'time': {
                '$gt': inittime,
                '$lt': termtime
            }}):
            device_no = int(volt['device_no'])
            v = volt['voltage']
            t = volt['time']
            times[device_no].append(t)
            volts[device_no].append(v)

        filter_thread = [0.2, 0.06, 0.08]
        for i in range(start, end + 1):
            filter_volts[i] = volts[i]
            # 小波变换滤波
            filter_volts[i] = cwt_filter(volts[i], 0.08)

            # 傅里叶变换滤波
            # filter_volts[i] = fft_filter(filter_volts[i], 1 / 70, 15)

            # 低通滤波器滤波
            # b, a = signal.butter(8, 3 / 7, 'lowpass')  # 配置滤波器,8表示滤波器的阶数
            # filter_volts[i] = signal.filtfilt(b, a, filter_volts[i])

            # 移动平均滤波,参数可选:full, valid, same
            # filter_volts[i] = np_move_avg(filter_volts[i], 5, mode="same")

            # 归一化数据
            normalize_volts[i] = getNormalization(filter_volts[i])

        # 定义存储时间、特征列表
        feature_times, feature_values = {}, {}
        for i in range(start, end + 1):
            feature_times[i] = []
            from collections import defaultdict
            feature_values[i] = defaultdict(list)
            for feature in feature_names:
                feature_values[i][feature[:-6]] = []

        # 对每个采集设备进行特征提取
        for i in range(start, end + 1):
            for j in range(len(normalize_volts[i])):
                value = {"time": times[i][j], "volt": normalize_volts[i][j]}
                output = extractor.process(value)
                if (output):
                    features = {
                        "device_no": i,
                        "feature_time": times[i][j],
                        "feature_value": output,
                        "interval": interval,
                        "rate": rate
                    }
                    feature_times[i].append(features['feature_time'])
                    for feature_type in feature_values[i].keys():
                        feature_values[i][feature_type].append(
                            features['feature_value'][feature_type])

            # 清理所有模块,防止过期数据
            extractor.clear()
        extractor.clear()

        # 定义特征数量
        nfeatures = len(feature_values[1])

        # 定义特征类型
        feature_type = list(
            feature_values[1].keys())  # keys()方法虽然返回的是列表,但是不可以索引

        for i in range(start, end + 1):

            # 如果文件存在,则以添加的方式打开
            if (os.path.exists("feature_matrixs/feature_matrix" + str(i) +
                               ".npy")):
                feature_matrix = np.load("feature_matrixs/feature_matrix" +
                                         str(i) + ".npy")
                label_matrix = np.load("feature_matrixs/label_matrix" +
                                       str(i) + ".npy")
                temp_matrix = np.zeros((len(feature_times[i]), nfeatures),
                                       dtype=float)

                os.remove("feature_matrixs/feature_matrix" + str(i) + ".npy")
                os.remove("feature_matrixs/label_matrix" + str(i) + ".npy")

                for j in range(len(feature_times[i])):
                    for k in range(nfeatures):
                        temp_matrix[j][k] = feature_values[i][
                            feature_type[k]][j]
                    label_matrix = np.append(label_matrix, [action_num])

                # np.append(feature_matrixs, [temp_matrix], axis=0)
                feature_matrix = np.insert(feature_matrix,
                                           feature_matrix.shape[0],
                                           values=temp_matrix,
                                           axis=0)

                np.save('feature_matrixs/feature_matrix' + str(i),
                        feature_matrix)
                np.save('feature_matrixs/label_matrix' + str(i), label_matrix)
                np.set_printoptions(suppress=True)
                np.savetxt('feature_matrixs/feature_matrix' + str(device_no) +
                           '.txt',
                           feature_matrix,
                           fmt="%.18f,%.18f")

                print("feature_matrix" + str(i) + ":" +
                      str(feature_matrix.shape))

            # 如果文件不存在,则定义特征矩阵和标签矩阵
            else:
                feature_matrix = np.zeros((len(feature_times[i]), nfeatures),
                                          dtype=float)
                label_matrix = np.zeros((len(feature_times[i]), 1), dtype=int)

                for j in range(len(feature_times[i])):
                    for k in range(nfeatures):
                        feature_matrix[j][k] = feature_values[i][
                            feature_type[k]][j]
                    label_matrix[j] = action_num
                # np.save保存时自动为8位小数
                np.save('feature_matrixs/feature_matrix' + str(i),
                        feature_matrix)
                np.save('feature_matrixs/label_matrix' + str(i), label_matrix)

                np.set_printoptions(suppress=True)
                np.savetxt('feature_matrixs/feature_matrix' + str(device_no) +
                           '.txt',
                           feature_matrix,
                           fmt="%.18f,%.18f")

                print("feature_matrix" + str(i) + ":" +
                      str(feature_matrix.shape))
    'Loaded ' + str(len(train_images_filenames)) +
    ' training images filenames with classes ', set(train_labels))
print(
    'Loaded ' + str(len(test_images_filenames)) +
    ' testing images filenames with classes ', set(test_labels))

# Load precomputed labels if avaliable
precomp_label_filename = classifier + '_' + feature_method + '.npy'
if os.path.isfile(precomp_label_filename) and not force_reload:
    print 'Loading previous predictions'
    predicted_classes = np.load(precomp_label_filename)
else:
    start = time.time()

    print 'Extracting features'
    fe = FeatureExtractor(feature_method)
    (X, y) = fe.extract_features(train_images_filenames,
                                 train_labels,
                                 nimmax=30)

    print 'Training a classifier'
    c = Classifier(classifier)
    c.fit(X, y)

    print 'Predicting test set labels with the classifier'
    numtestimages = 0
    predicted_classes = []
    for i in range(len(test_images_filenames)):
        imfilename = test_images_filenames[i]
        des = fe.extract_single_image_features(imfilename)
        predictedclass = c.predict(des)