示例#1
0
def generate_captions(filename, top_n=5):
    cnn_features = get_features(filename)
    stop_idx = word_to_idx['<START/STOP>']

    beam_size = 20
    pool = [[[stop_idx], 0]]
    first_pass = True
    # n.b. this is suboptimal a lot of recalculations occur
    while any(each[0][-1] != stop_idx for each in pool) or first_pass:
        new_pool = []
        for entry in pool:
            if entry[0][-1] == stop_idx and not first_pass:
                new_pool.append(entry)
                continue
            probs = predict_probs(entry[0], cnn_features).flatten()
            for word_idx in probs.argsort()[-beam_size:]:
                k = len(entry[0]) - 1
                caption = entry[0] + [word_idx]
                score = (entry[1] * k - np.log(probs[word_idx])) / (k + 1)
                new_pool.append([caption, score])
        pool = sorted(new_pool, key=lambda e: e[1])[:beam_size]
        first_pass = False
        if beam_size != 5:
            beam_size -= 1
    pool = sorted(pool, key=lambda e: e[1])[:top_n]

    for entry in pool:
        entry[1] *= len(entry[0]) - 1
        entry[0] = u' '.join([idx_to_word[word_idx] for word_idx in entry[0][1:-1]])
    pool = sorted(pool, key=lambda e: e[1])
    return zip(*pool)
示例#2
0
def generate_captions(filename, top_n=5):
    cnn_features = get_features(filename)
    stop_idx = word_to_idx['<START/STOP>']

    beam_size = 20
    pool = [[[stop_idx], 0]]
    first_pass = True
    # n.b. this is suboptimal a lot of recalculations occur
    while any(each[0][-1] != stop_idx for each in pool) or first_pass:
        new_pool = []
        for entry in pool:
            if entry[0][-1] == stop_idx and not first_pass:
                new_pool.append(entry)
                continue
            probs = predict_probs(entry[0], cnn_features).flatten()
            for word_idx in probs.argsort()[-beam_size:]:
                k = len(entry[0]) - 1
                caption = entry[0] + [word_idx]
                score = (entry[1] * k - np.log(probs[word_idx])) / (k + 1)
                new_pool.append([caption, score])
        pool = sorted(new_pool, key=lambda e: e[1])[:beam_size]
        first_pass = False
        if beam_size != 5:
            beam_size -= 1
    pool = sorted(pool, key=lambda e: e[1])[:top_n]

    for entry in pool:
        entry[1] *= len(entry[0]) - 1
        entry[0] = u' '.join(
            [idx_to_word[word_idx] for word_idx in entry[0][1:-1]])
    pool = sorted(pool, key=lambda e: e[1])
    return zip(*pool)
示例#3
0
def extract_features(image_dict, seg_dict):
    features = dict()
    for image_name in image_dict.keys():
        print image_name
        features[image_name] = feature_extractor.get_features(image_dict[image_name], image_name, seg_dict[image_name])
    
    return features
	def predictNeural(self, image):
		gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
		inputs_f = []
		inputs_f.append(fe.get_features(gray))
		#print "raw data about image", inputs_f

		# Create a matrix of predictions
		
		inputs_f.append(np.array([0., 10., 22., 26., 43., 37.,
    30., 27., 36., 48., 59., 70.,
    80., 82., 80., 75., 66., 57.,
    49., 123., 213., 123., 29., 25.,
    25., 24., 25., 30., 38., 57.,
    70., 92., 115., 122., 174., 199.,
   224., 255., 235., 116., 43., 26.,
    20., 18., 15., 1., 14., 12.,
    12., 10., 11., 13., 17., 18.,
    11., 10., 124., 18., 13., 13.,
    37., 17.,  5.,  1.,  8.80175781, 2.59635413, 1.29720053]))
		inputs = np.array(inputs_f)
		predictions =  np.empty( (len(inputs), 1), 'float' )
		# See how the network did.
		#print "inputs", inputs
		#print str(self.nnet)
		self.nnet.predict(inputs, predictions)
		#print "predictions", predictions
		# Compute # correct
		pred_labels = predictions
		
		#print pred_labels
		return pred_labels[0]
	def test(self, sample_img, numNeigh=11):
		k = numNeigh
		gray = cv2.cvtColor(sample_img,cv2.COLOR_BGR2GRAY)
		sample = fe.get_features(gray)
		sample = np.array(sample,np.float32).reshape((1,len(sample)))
		nearest = self.knn.find_nearest(sample, k)
		return nearest[0]
	def predictSVM(self, im):
		gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
		inputs_f = []
		inputs_f.append(fe.get_features(gray))
		inputs = np.array(inputs_f)

		result = self.svm.predict_all(inputs)

		print result
		return result[0]
示例#7
0
def lineparser(line):
    if line[0] is not "#":
        line_fields = line.split("\t")
        if "PE" in line_fields[7]:
            try:
                if pefile.PE("extract_files/"+line_fields[22],fast_load=True).is_exe(): #just another way to validate
                    print "EXE FILE DOWNLOADED FROM %s BY %s"%(line_fields[2],line_fields[3])
                    print requests.post('http://localhost:8080/ML',json=feature_extractor.get_features("extract_files/"+line_fields[22])).json()
            except:
                print "Unable to open the file"
示例#8
0
    def run(self):
        self.key = "mlmd"

        try:
            if os.path.exists(self.file_path):
                #extract PE data, send to MLMD server, set data=reply
                features = feature_extractor.get_features(self.file_path)
                res = requests.post("http://localhost:8080/ML",json=features)
                data = res.json()
        except SomethingFailed:
            raise CuckooProcessingError("Failed")

        return data
def main(argv):

    if len(argv) != 1:
        print("Usage: python3 detect_object.py input-image-path")
        exit()

    # READ IMAGE
    IMAGEPATH = argv[0]
    img = Image.open(IMAGEPATH).resize((224, 224))

    # LOAD PRETRAINED VGG16 MODEL FOR FEATURE EXTRACTION
    vgg_model = get_model()
    # EXTRACT IMAGE FEATURE
    img_feature = get_features(vgg_model, img)
    # L2 NORMALIZE FEATURE
    img_feature = normalize(img_feature, norm='l2')

    # LOAD ZERO-SHOT MODEL
    model = load_keras_model(model_path=MODELPATH)
    # MAKE PREDICTION
    pred = model.predict(img_feature)

    # LOAD CLASS WORD2VECS
    class_vectors = sorted(np.load(WORD2VECPATH, allow_pickle=True),
                           key=lambda x: x[0])
    classnames, vectors = zip(*class_vectors)
    classnames = list(classnames)
    vectors = np.asarray(vectors, dtype=np.float)

    # PLACE WORD2VECS IN KDTREE
    tree = KDTree(vectors)
    # FIND CLOSEST WORD2VEC and GET PREDICTION RESULT
    dist, index = tree.query(pred, k=5)
    pred_labels = [classnames[idx] for idx in index[0]]

    # PRINT RESULT
    print()
    print("--- Top-5 Prediction ---")
    for i, classname in enumerate(pred_labels):
        print("%d- %s" % (i + 1, classname))
    print()
    return
示例#10
0
def get_similarity_list(source_id):
    global unique_images

    source_image_data = feat_ex.get_np_array_from_image("test/generated/" +
                                                        str(source_id) +
                                                        ".jpg")
    # get features for source image
    feat = feat_ex.get_features(intermediate_model, source_image_data)

    # get similarity measure with entire dataset
    cosine_dist_matrix = feat_ex.compute_cosine_distance_matrix(
        feat, feat_matrix)
    similarity_score_index = cosine_dist_matrix.flatten().argsort(
    )[::-1][:num_similar]
    similarity_scores = cosine_dist_matrix[similarity_score_index].flatten()

    similar_images = np_original[similarity_score_index]

    # plot_similar(source_image_data, similarity_score_index,
    #              similar_images, similarity_scores)
    similarity_details = []
    for i in range(len(similar_images)):
        # keep track of images we havent seen
        if (similarity_score_index[i] not in unique_images):
            unique_images.append(similarity_score_index[i])

        similarity_dict = {
            "id": int(similarity_score_index[i]),
            "score": (round(similarity_scores[i], 4)),
            "layer": layer_name,
            "sourceid": source_id
        }
        similarity_details.append(similarity_dict)

    # print(similarity_details)
    print(source_id, " > processing similarity details using VGG layer ",
          layer_name)
    return similarity_details
示例#11
0
def apply_pca():

    dataset_size = 112
    dim = 100352
    i = 0
    matrix = np.zeros((dataset_size, dim))
    for imagePath in glob.glob(dataset + os.path.sep + "*.*"):
        # extract our unique image ID (i.e. the filename)
        features = get_features(imagePath)
        matrix[i] = features
        i += 1

    print(matrix.shape)
    reduced_dim = 100
    pca = PCA(n_components=reduced_dim)
    principal_comp = pca.fit_transform(matrix)
    print(principal_comp.shape)
    # print()
    i = 0
    for imagePath in glob.glob(dataset + os.path.sep + "*.*"):
        with h5py.File(index_file, 'a') as h:
            k = imagePath[imagePath.rfind('h') + 1:]
            h.create_dataset(k, data=principal_comp[i])
            i += 1
    test_ids = shuffled_ids[int(all_img_num * test_ratio) *
                            testset:int(all_img_num * test_ratio) *
                            (testset + 1)]
    train_img_num = len(train_ids)
    print ">> train-test split (%d, %d)" % (len(train_ids), len(test_ids))

    ##################################################
    #	EXTRACT FEATURES
    ##################################################
    fdir = hp.cur_dir + "tmp/features_colbow.csv"
    if os.path.isfile(fdir):
        print ">> use existing features"
        features = np.genfromtxt(fdir, delimiter=',')
    else:
        print ">> extract features"
        features = fe.get_features(images,
                                   hp.FeatureType.COL_BOW)  # imgNum*featureLen
        np.savetxt(fdir, features, delimiter=',')
        print ">> features extracted"

    ##################################################
    #	FIT A CLASSIFIER USING RANDOM DATA
    ##################################################
    all_data = train_ids
    sampled_num = int(train_img_num * hp.sampling_ratio)
    sampled_ids = all_data[:sampled_num]
    lbs = [labels[x] for x in sampled_ids]
    fts = [features[x] for x in sampled_ids]
    classifier = cf.get_classifier(fts, lbs, hp.ClassifierType.RANDOM_FOREST)
    print ">> fit classifier with %d labeled samples" % sampled_num

    train_features = [features[x] for x in train_ids]
示例#13
0
def get_data(use_precomputed=False):
    if use_precomputed:
        filename = "all_data.pkl"

        if not isfile(filename):
            print("couldn't load pickle file. recomputing features")
            return get_data(use_precomputed=False)

        else:
            print("loading pickled data")
            return load_obj(filename)

    else:

        # # subjects = [101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117]
        # subjects = [101,103,104,106,107,108,109,110,111,112,113,114,115,116,117]
        # labels = [1, 2, 3, 4, 5]
        # class_names = "none,eyebrows lower,eyebrows raiser,cheek raiser,nose wrinkler,upper lip raiser,mouth open".split(',')
        # is_moving_data = False

        X_all_raw = None
        raw_index = [
        ]  # list of tuples containing (subject number, label number, trial index)
        X_all = None
        y_all = []
        groups = []

        # accumulate the data for the all the subjects
        print("reading raw data into memory")
        for subject in subjects:
            # subject_data = np.zeros(shape=(0,201,10))
            for label in labels:
                path = get_path(subject, label, is_moving_data)

                # [ trial * window frames * sensor channels ]
                subject_matrix = scipy.io.loadmat(path)['data_chunk']

                groups += [subject] * subject_matrix.shape[0]
                y_all += [label] * subject_matrix.shape[0]

                for trial in range(subject_matrix.shape[0]):
                    raw_window = subject_matrix[trial, :, :]
                    # print(raw_window.shape)
                    if X_all_raw is None:
                        X_all_raw = np.empty(shape=(0, len(raw_window), 10),
                                             dtype=float)
                    # print(X_all_raw.shape)
                    # exit()
                    X_all_raw = np.concatenate(
                        (X_all_raw, raw_window[np.newaxis, :, :]), axis=0)
                    raw_index += [(subject, label, trial)]

        print("normalizing data")
        # normalize accelerometer signals
        a = np.mean(np.std(X_all_raw[:, :, 0:3], axis=2))
        b = np.mean(np.mean(X_all_raw[:, :, 0:3], axis=2))
        X_all_raw[:, :, 0:3] = (X_all_raw[:, :, 0:3] - b) / a

        # normalize gyroscope signals
        a = np.mean(np.std(X_all_raw[:, :, 3:6], axis=2))
        b = np.mean(np.mean(X_all_raw[:, :, 3:6], axis=2))
        X_all_raw[:, :, 3:6] = (X_all_raw[:, :, 3:6] - b) / a

        # normalize eog signals
        # a = np.mean(np.std(X_all_raw[:,:,6:], axis=2))
        # b = np.mean(np.mean(X_all_raw[:,:,6:], axis=2))
        # X_all_raw[:,:,6:10] = (X_all_raw[:,:,6:10] - b) / a

        mean_eog_signals = np.mean(np.mean(X_all_raw[:, :, 6:10], axis=1),
                                   axis=0)
        X_all_raw[:, :, 6:10] = X_all_raw[:, :, 6:10] - mean_eog_signals

        print("saving raw data")
        raw_index = np.array(raw_index)
        save_obj((X_all_raw, raw_index),
                 "../../res/all_data_raw.pkl",
                 sanitized=False)
        # exit()

        print("extracting features")
        for trial in tqdm(range(X_all_raw.shape[0])):
            feature_extracted_window, feature_names = get_features(
                X_all_raw[trial, :, :], include_eog, include_imu)
            feature_extracted_window = np.array(feature_extracted_window)

            if X_all is None:
                X_all = np.empty(shape=(0, len(feature_extracted_window)),
                                 dtype=float)
            X_all = np.concatenate(
                (X_all, feature_extracted_window[np.newaxis, :]), axis=0)

        y_all = np.array(y_all)
        # np.savetxt("y_all.txt", y_all)  # DEBUG DEBUG DEBUG DEBUG DEBUG DEBUG DEBUG
        groups = np.array(groups)

        data_blob = (X_all, y_all, groups, feature_names, subjects, labels,
                     class_names, is_moving_data, include_eog, include_imu)

        print("pickling data")
        save_obj("all_data.pkl", data_blob)

        return data_blob
示例#14
0
from feature_extractor import get_features
from xg_model import xgb_score
from xg_model import xgb_model
from xg_model import save_result

if __name__ == '__main__':
    train, test, feature_types = get_features(regenerate=False)
    print(train[0].head())
    # get model score
    # the default cv is 5
    xgb_score(train)

    # get model predict
    #predict = xgb_model(train[0], train[1], test[0])

    # save model predict to prediction/
    # return is the csv content. type is Dataframe
    # the default csv name is test_result
    result = save_result(train, test)
    print(result.head())
	def test(self, sample_img):
		k = 10
		gray = cv2.cvtColor(sample_img,cv2.COLOR_BGR2GRAY)
		sample = fe.get_features(gray)
		sample = np.array(sample,np.float32).reshape((1,len(sample)))
		return self.knn.find_nearest(sample, k)[0]
def lbp_pipeline(gray_image, **kwargs):
    return get_features(*image_preprocessing(gray_image))
示例#17
0
    dim = 100352
    i = 0
    matrix = np.zeros((dataset_size, dim))
    for imagePath in glob.glob(dataset + os.path.sep + "*.*"):
        # extract our unique image ID (i.e. the filename)
        features = get_features(imagePath)
        matrix[i] = features
        i += 1

    print(matrix.shape)
    reduced_dim = 100
    pca = PCA(n_components=reduced_dim)
    principal_comp = pca.fit_transform(matrix)
    print(principal_comp.shape)
    # print()
    i = 0
    for imagePath in glob.glob(dataset + os.path.sep + "*.*"):
        with h5py.File(index_file, 'a') as h:
            k = imagePath[imagePath.rfind('h') + 1:]
            h.create_dataset(k, data=principal_comp[i])
            i += 1


# apply_pca()

for imagePath in glob.glob(dataset + os.path.sep + "*.*"):
    # extract our unique image ID (i.e. the filename)
    k = imagePath[imagePath.rfind('h') + 1:]
    features = get_features(imagePath)
    with h5py.File(index_file, 'a') as h:
        h.create_dataset(k, data=features)