def processFrame(im):
    utils.preprocessing(im)
    # To see in the right way
    im = cv2.flip(im, 1)
    im_height, im_width = im.shape
    global TPL_RECT
    global TPL_CENTER
    global FILTER_INIT
    global FILTER_NUM
    global FILTER_DEN
    global COS_WINDOW
    global PSR_THRES

    psr_test = False
    if TPL_RECT != None:
        tplc = TPL_RECT.astype(np.int64)
        patch = im[tplc[0, 0] : tplc[1, 0], tplc[0, 1] : tplc[1, 1]]
        if not FILTER_INIT:
            initFilter(patch)
            print tplc.shape
        else:
            if patch.shape == COS_WINDOW.shape:
                height, width = patch.shape
                #                ptpl = patch * COS_WINDOW
                #                height, width = ptpl.shape
                #                output = utils.genGaussianMatrix(width, height, (width/2, height/2), 2.0)
                #                ftpl = np.fft.fft2(ptpl)
                #                foutput = np.fft.fft2(output)
                #                n = foutput #* np.conj(ftpl)
                #                d = ftpl #* np.conj(ftpl)
                G = np.conj(FILTER_NUM / FILTER_DEN) * np.conj(np.fft.fft2(patch * COS_WINDOW))
                g = np.real(np.fft.ifft2(G))
                utils.showImage("output", g)
                utils.showImage("filter", np.real(np.fft.fftshift(np.fft.ifft2(np.conj(FILTER_NUM / FILTER_DEN)))))
                psr = utils.computePSR(g)
                psr_test = psr > PSR_THRES
                if True:
                    peak_pos = np.argmax(g)
                    dy = peak_pos // width - height // 2
                    dx = peak_pos % width - width // 2
                    if tplc[0, 0] - dy < 0:
                        dy = tplc[0, 0]
                    if tplc[1, 0] - dy >= im_height:
                        dy = tplc[1, 0] - im_height
                    if tplc[0, 1] - dx < 0:
                        dx = tplc[0, 1]
                    if tplc[1, 1] - dx >= im_width:
                        dx = tplc[1, 1] - im_width
                    tplc[:, 0] -= dy
                    tplc[:, 1] -= dx
                    TPL_RECT[:, 0] -= dy
                    TPL_RECT[:, 1] -= dx
                    new_patch = im[tplc[0, 0] : tplc[1, 0], tplc[0, 1] : tplc[1, 1]]
                    updateFilter(new_patch)
                # print psr

        utils.drawRectangle(im, tplc, not psr_test)

    return im
Пример #2
0
def main():
    app = Flask(__name__)

    default_csv = "data.csv"
    #abspath = os.path.abspath(os.path.dirname(__file__))
    path_to_csv = os.path.join(abspath, default_csv)
    graph_path = 'graph'
    data = preprocessing(path_to_csv)

    @app.route("/")
    def index():
        return render_template('index.html')

    @app.route("/task2")
    def task_2():
        task2 = task_two(data)
        return render_template('task2.html', **task2)

    @app.route("/task2-with-graphs")
    def task_2_with_graphs():
        task2 = graph_process(data)
        return render_template('task2_graph.html', **task2)

    @app.route('/<path:filename>')
    def download_file(filename):
        return send_from_directory(graph_path, filename, as_attachment=True)

    app.run()
Пример #3
0
def run_model(embedding_path, input_path, output_path, model):
    try:
        with open(TESTING_DATA, 'rb') as f:
            x0, x1 = pickle.load(f)
    except:
        x0, x1, _ = preprocessing(embedding_path, input_path, testing=True)
        with open(TESTING_DATA, 'wb') as f:
            pickle.dump((x0, x1), f)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    x0 = x0.to(device)
    x1 = x1.to(device)
    y_pred = model(x0, x1)

    with open(output_path, 'w') as f:
        for y in y_pred:
            max_prob = -1
            max_k = -1
            for j in range(6):
                if y[j] > max_prob:
                    max_prob = y[j].item()
                    max_k = j
            if max_k < 3:
                f.write('true\t')
            else:
                f.write('false\t')
            f.write(max_k / 5.0 + '\n')
def classify_rnn2(model_path, text, dp):
	f = open(dp, 'rb')
	p = pickle.load(f)
	f.close()

	v = list(p['vocab'])
	c = list(p['categories'])

	v.sort()
	c.sort()

	v = { w : i + 1 for i, w in  enumerate(v)}
	c = { w : i for i, w in enumerate(c)}
	ms = p['max_sequence']
	ms = 10 * int(ms / 10) + 10

	model = rnn((ms,), len(v) + 1, ms, pretrained_weights=model_path)

	x = preprocessing(text)
	x = compute_input(x, v, ms)
	x = np.reshape(x, (1, 5430))


	y = model.predict_on_batch(x)

	return read_output(y, c)
def perform_inference(exec_net, request_type,input_image, input_shape):

    '''
    Performs inference on the input image given the Executable Network
    '''

    #get the input input_image
    image= cv2.imread(input_image)
    #extract input_shape
    n,c,h,w=  input_shape

    #call preprocessing from utils
    preprocessed_image= preprocessing(image, h, w)

    input_blob = next(iter(exec_net.inputs))


    '''
    Perform eother Async or Sync inference
    '''
    request_type= request_type.lower()
    if request_type=='a':
        output= async_inference(exec_net, input_blob, preprocessed_image)
    elif request_type=='s':
        output= sync_inference(exec_net, input_blob, preprocessed_image)
    else:
        print("Unknown inference request type , use either 'a' or 's")


    return output
Пример #6
0
def predict():
    model = load_model(config.model_file)
    # 验证模型
    data = utils.load_data()
    label_scaler, _, _, test_X, test_y, test_data = utils.preprocessing(data)
    test_dates = test_data.index
    pred_daily_df = pd.DataFrame(columns=['True Value', 'Pred Value'],
                                 index=test_dates)

    for i, test_date in enumerate(test_dates):
        X = test_X[i].reshape(1, config.time_step, test_X.shape[2])
        y_pred = model.predict(X, batch_size=config.batch_size)[0]
        # scale反向操作,恢复数据范围
        rescaled_y_pred = label_scaler.inverse_transform(y_pred.reshape(-1,
                                                                        1))[0,
                                                                            0]

        # 差分反向操作,恢复数据的值:加上前一天的真实标签
        previous_date = test_date - pd.DateOffset(days=1)
        recoverd_y_pred = rescaled_y_pred + data.loc[previous_date][
            config.raw_label_col]
        true_value = test_data.loc[test_date][config.raw_label_col] + data.loc[
            previous_date][config.raw_label_col]

        # 保存数据
        pred_daily_df.loc[test_date, 'Pred Value'] = recoverd_y_pred
        pred_daily_df.loc[test_date, 'True Value'] = true_value
        print('Date={}, 真实值={}, 预测值={}'.format(test_date, true_value,
                                               recoverd_y_pred))

    pred_daily_df.plot()
    plt.show()
def predictImage(img):
    images = utils.getCroppedImages(img)
    cropped_images = images[0]
    cv2.imshow("Original image", images[1])
    if cropped_images:
        largest_sign = utils.findLargestCropped(cropped_images)
        src = np.asarray(largest_sign)
        src = cv2.resize(src, (32, 32))
        src = utils.preprocessing(src)
        src = src.reshape(1, 32, 32, 1)

        # predict
        predictions = model2.predict(src)
        classIndex = model2.predict_classes(src)
        probability = np.amax(predictions)

        print(str(classIndex) + " " + str(utils.getCalssName(classIndex)))
        print(str(round(probability * 100, 2)) + "%")
        resized = cv2.resize(largest_sign, (256, 256),
                             interpolation=cv2.INTER_AREA)

        if probability > threshold:
            # print(str(classIndex) + " " + str(utils.getCalssName(classIndex)))
            # print(str(round(probability * 100, 2)) + "%")

            cv2.putText(
                resized,
                str(classIndex) + " " + str(utils.getCalssName(classIndex)),
                (8, 25), font, 0.5, (0, 255, 0), 2, cv2.LINE_AA)
            cv2.putText(resized,
                        str(round(probability * 100, 2)) + "%", (16, 45), font,
                        0.5, (0, 255, 0), 2, cv2.LINE_AA)

            cv2.imshow("Result", resized)
Пример #8
0
def fetch():
    image_path = TRAINS_GROUP[random.randint(0, len(TRAINS_GROUP))]
    image = Pil_Image.open(image_path).convert('L')
    captcha_image = preprocessing(np.array(image), BINARYZATION, SMOOTH, BLUR)
    captcha_image = Pil_Image.fromarray(captcha_image)
    captcha_image = captcha_image.resize((IMAGE_WIDTH, IMAGE_HEIGHT))
    return image_path, ImageTk.PhotoImage(captcha_image)
Пример #9
0
    def _parse(self, file):
        with open(train_txt_dir + os.sep + str(file) + '.txt', 'r') as f:
            content = f.readline().rstrip('\n').split(',')

        content = map(np.float32, content)
        content = preprocessing(content)

        return content
Пример #10
0
def fetch():
    image_path = TRAINS_GROUP[random.randint(0, len(TRAINS_GROUP))]
    image = Pil_Image.open(image_path)
    captcha_image = preprocessing(image, BINARYZATION, SMOOTH, BLUR,
                                  IMAGE_ORIGINAL_COLOR, INVERT)
    captcha_image = Pil_Image.fromarray(captcha_image)
    captcha_image = captcha_image.resize(RESIZE if RESIZE else (IMAGE_WIDTH,
                                                                IMAGE_HEIGHT))
    return image_path, ImageTk.PhotoImage(captcha_image)
Пример #11
0
 def __getitem__(self, idx):
     # print('idx ', idx)
     batch_x = self.image_filenames[idx * self.batch_size:(idx + 1) *
                                    self.batch_size]
     image_x_raw = utils.read_data(path_list=batch_x)
     batch_y = self.label_names[idx * self.batch_size:(idx + 1) *
                                self.batch_size]
     image_y_raw = utils.read_data(path_list=batch_y)
     image_x = []
     image_y = []
     for i in range(len(batch_x)):
         image_x_tmp, image_y_tmp = utils.preprocessing(
             image_x_raw[i]), utils.preprocessing(image_y_raw[i])
         image_x.extend(image_x_tmp)
         image_y.extend(image_y_tmp)
     image_x_np, image_y_np = np.array(image_x, dtype=np.float32), np.array(
         image_y, dtype=np.float32)
     return image_x_np, image_y_np
Пример #12
0
def run_train(sess, train_file):
    """Training the model
    """
    sensor_data = []
    sensor_loc = []

    with open(train_file, "r") as f:
        header = f.readline()
        reader = csv.reader(f, delimiter=",")
        for row in reader:
            x_location = int(row[1]) // 15 + 2
            sensor_data.append(
                [int(row[0]),
                 float(row[5]),
                 float(row[6]),
                 float(row[7])])  # Trial, V, Grad, Ref_force
            sensor_loc.append(loc2array(FLAGS.num_class, x_location))

    train_seq = preprocessing(sensor_data)
    total_batch = np.shape(train_seq)[0] // FLAGS.batch_size

    loc_input = np.array(sensor_data)[:, (1, 2)]  # V, gradV
    sensor_loc = np.array(sensor_loc)  # Location of pressure
    sensor_output = np.array(sensor_data)[:, 3]  # Ref_Force

    X = tf.get_collection('input')[0]
    V = tf.get_collection('input')[1]
    L = tf.get_collection('ground_truth')[0]
    Y = tf.get_collection('ground_truth')[1]

    for epoch in range(1, FLAGS.total_epoch + 1):
        avg_cost = 0.

        # Training step
        for i in range(total_batch):
            data_idxs = train_seq[i * FLAGS.batch_size:(i + 1) *
                                  FLAGS.batch_size]
            seq_idxs = np.array([
                data_idxs - n for n in reversed(range(0, FLAGS.seq_length))
            ]).T

            seq_x = np.reshape(loc_input[seq_idxs],
                               [-1, FLAGS.seq_length, FLAGS.input_dim])
            v = np.reshape(np.array(sensor_data)[:, 1][data_idxs], [-1, 1])
            seq_l = np.reshape(sensor_loc[data_idxs], [-1, FLAGS.num_class])
            seq_y = np.reshape(sensor_output[data_idxs], [-1, 1])

            _, _cost = sess.run(tf.get_collection('train_ops'),
                                feed_dict={
                                    X: seq_x,
                                    V: v,
                                    L: seq_l,
                                    Y: seq_y
                                })
            avg_cost += _cost / total_batch
        print("Epoch: {}, Cost: {:.4}".format(epoch, avg_cost))
    print("Localization - Optimization Finished!")
Пример #13
0
def main():
	""" 
	Main function

	"""
	if len(sys.argv) != 5:
		print 'spgk.py <filenamepos> <filenameneg> <windowsize> <depth>'
	else:
		filename_pos = sys.argv[1]
		filename_neg = sys.argv[2]
		window_size = int(sys.argv[3])
		depth = int(sys.argv[4])

		docs_pos = load_file(filename_pos)
		docs_pos = preprocessing(docs_pos)
		labels_pos = []
		for i in range(len(docs_pos)):
			labels_pos.append(1)
		
		docs_neg = load_file(filename_neg)
		docs_neg = preprocessing(docs_neg)
		labels_neg = []
		for i in range(len(docs_neg)):
			labels_neg.append(0)
		
		docs = docs_pos
		docs.extend(docs_neg)
		labels = labels_pos
		labels.extend(labels_neg)
		labels = np.array(labels)
		
		vocab = set()
		for doc in docs:
			for term in doc:
				if term not in vocab:
					vocab.add(term)

		print "\nVocabulary size: ",len(vocab)

		
		graphs = create_graphs_of_words(docs, window_size)
		K = build_kernel_matrix(graphs, depth)
		
		learn_model_and_predict_k_fold(K, labels)
Пример #14
0
def Dot_to_Data(dot, cwe_label, label):
    """
    Converts an obj:'pydot.dot' to a class: 'torch_geometric.data.Data' instance

    :param dot: pydot graph
    :param cwe_label: CWE***
    :param label: 1-good, 0-bad
    :return: torch_geometric.data.Data
    """
    node_list = dot[0].get_nodes()
    if len(node_list) < 2:
        return -1
    edges = dot[0].get_edges()
    edge_i = []
    for edge in edges:
        src = edge.get_source().split(":")[0]
        dst = edge.get_destination()
        for node in node_list:
            if node.get_name() == src:
                src_id = node_list.index(node)
            if node.get_name() == dst:
                dst_id = node_list.index(node)
        edge_i.append([src_id, dst_id])
    edge_index = torch.tensor(edge_i).t().contiguous()

    data = {}
    for i, node in enumerate(node_list):
        value = node.obj_dict['attributes']['label']
        tokens = utils.preprocessing(value)
        tokenvec = np.zeros((128, ), dtype=float)
        token_number = len(tokens)
        for token in tokens:
            if token in w2vmodel.wv:
                if is_number(token):
                    tokenvec += 1000 * (w2vmodel.wv[token] - w2vmodel.wv['16'])
                else:
                    tokenvec += (w2vmodel.wv[token])
            else:
                pass
        tokenvec = tokenvec / token_number
        data["x"] = [tokenvec] if i == 0 else data["x"] + [tokenvec]

    for key, item in data.items():
        try:
            data[key] = torch.tensor(item)
        except ValueError:
            pass

    data['edge_index'] = edge_index.view(2, -1).long()
    data = Data.from_dict(data)
    data.num_nodes = len(node_list)
    data.x = data.x.float()
    data.y = torch.tensor([label], dtype=torch.long)
    data.__setitem__('cwe', cwe_label)

    return data
Пример #15
0
def main():
    print('Creating the dataset...')
    df = utils.build_dataset(wav_number=50, random_sate=43)

    df['prediction'] = df.apply(utils.model2, axis=1)
    acc = utils.accuracy(df['speaker'], df["prediction"])

    print(f'The cepstrum-pitch-based model has {acc*100:.2f}% accuracy.')

    df = df.drop(['fs', 'duration', 'prediction'], axis=1)
    train_set, test_set = train_test_split(df, test_size=0.2, random_state=42)
    X_train, y_train = utils.preprocessing(train_set)
    X_test, y_test = utils.preprocessing(test_set)

    rforest = RandomForestClassifier(random_state=42)
    rforest.fit(X_train, y_train)
    ypred = rforest.predict(X_test)

    acc = utils.accuracy(y_test, ypred)
    print(f'The machine learning based model has {acc * 100:.2f}% accuracy.')
Пример #16
0
def create_name2feature():
    print('create_name2feature')
    with open('data/fname2name.json') as f:
        fname2name = json.load(f)

    name2feature = {}
    for fname, name in tqdm(fname2name.items()):
        img = cv2.imread(os.path.join('img', fname), 1)
        img = preprocessing(img)
        feature = calc_hog_feature(img)
        name2feature[name] = feature

    with open('data/name2feature.pkl', 'wb') as f:
        pickle.dump(name2feature, f)
Пример #17
0
def fetch():
    image_path = TRAINS_GROUP[random.randint(0, len(TRAINS_GROUP))]
    image = Pil_Image.open(image_path)
    rgb = image.split()
    size = image.size
    if len(rgb) > 3:
        background = Pil_Image.new('RGB', size, (255, 255, 255))
        background.paste(image, (0, 0, size[0], size[1]), image)
        image = background
    image = image.convert('L')
    captcha_image = preprocessing(np.array(image), BINARYZATION, SMOOTH, BLUR)
    captcha_image = Pil_Image.fromarray(captcha_image)
    captcha_image = captcha_image.resize((IMAGE_WIDTH, IMAGE_HEIGHT))
    return image_path, ImageTk.PhotoImage(captcha_image)
Пример #18
0
 def _suck(self, color, depth):
     fg_mask = utils.background_subtraction(color, depth,
                                            self.background_color,
                                            self.background_depth)
     color_tensor, depth_tensor = utils.preprocessing(color, depth)
     if self.use_cuda:
         color_tensor = color_tensor.cuda()
         depth_tensor = depth_tensor.cuda()
     predict = self.suck_net.forward(color_tensor, depth_tensor)
     suctionable = predict.detach().cpu().numpy()[0, 1]
     suctionable = cv2.resize(suctionable,
                              dsize=(suctionable.shape[1] * self.scale,
                                     suctionable.shape[0] * self.scale))
     suctionable[fg_mask == 0] = 0.0  # Background
     return suctionable
Пример #19
0
def train():
    data = utils.load_data()
    label_scaler, train_X, train_y, test_X, test_y, _ = utils.preprocessing(
        data)
    input_dim = train_X.shape[2]
    model = LSTM_Model(input_dim, config.time_step, config.batch_size,
                       config.hidden_num).build()
    model.fit(train_X,
              train_y,
              batch_size=config.batch_size,
              epochs=config.nb_epoch,
              shuffle=False,
              validation_data=(test_X, test_y))
    mse = model.evaluate(test_X, test_y, batch_size=config.batch_size)
    print('Test mse: {}'.format(mse))
    model.save(config.model_file)
def calssify(text: str, estimator: DummyClassifier, counter: CountVectorizer,
             tfidf_matrix: TfidfTransformer) -> str:
    '''
	 clasifica el texto sucio acorde a las categorias de revolico
	:param text: raw text, not preprocesed
	:type text: str
	:param estimator: clasificador a usar
	:return: una categoria de revolico
	:rtype: str
	'''

    tok_text: List[str] = utils.preprocessing(text)
    count_vec = counter.transform([' '.join(tok_text)])
    tfidf_vec = tfidf_matrix.transform([count_vec])

    return estimator.predict([tfidf_vec])
def classify_rnn1(model, text: List[str]) -> str:
	word2index = joblib.load('./word2index')
	tag2index = joblib.load('./tag2index')

	t = preprocessing(text)

	new_sent = [word2index[word] for word in t]
	max_len = 5423
	x = pad_sequences(truncating='post', maxlen=max_len, sequences=[new_sent],
					  padding='post', value=0)

	cats = model.predict(x)

	pred = to_binary(cats)
	pred2 = to_tags(pred, tag2index)

	return pred2
Пример #22
0
 def test_preprocessing(self):
     """tests function that removes physically impossible data"""
     filename = 'mol_res_scan_results_7.csv'
     dir = os.getcwd()
     data1 = utils.load_data(dir, filename, filepath=[
         'tests',
     ])
     data_try = utils.preprocessing([
         data1,
     ],
                                    bounds={
                                        'yield': [0, 1],
                                        'purity': [0, 1]
                                    })[0]
     if data1.shape[0] > data_try.shape[0]:
         result = 1
     assert result == 1, "problem removing out of bounds data"
Пример #23
0
def get_predict():
    (json, errors) = theme_validate_predict()
    if errors:
        return resp(400, {'errors': errors})
    model_id = json['model_id']
    news_text = json['news_text']
    preds = {}
    tfidf_vectorizer = TFIDF_VECTORIZERS[model_id]
    estimators = PREDICTORS[model_id]
    labels_classes = CLASSES[model_id]
    print('env for prediction loaded')
    X_test = utils.preprocessing(news_text, [])
    X_test = tfidf_vectorizer.transform([X_test])
    print('prediction text processed')
    for label_class, est in zip(labels_classes, estimators):
        proba = est.predict_proba(X_test)[:, 1]
        preds.update({str(label_class): proba.tolist()})
    return resp(200, {'model': model_id, 'classes probabilities': preds})
Пример #24
0
    def select_img(self, img, offset, icon_size, num_slide, slide_size):
        candidate = []

        for x, y in product(range(num_slide), range(num_slide)):
            clipped_img = clip_img(img, offset[0] + x * slide_size,
                                   offset[1] + y * slide_size, icon_size,
                                   icon_size)
            clipped_img = preprocessing(clipped_img)
            target_feature = calc_hog_feature(clipped_img)
            result = [(name, feature,
                       np.linalg.norm(target_feature - feature, ord=1))
                      for name, feature in self.name2feature.items()]

            result = sorted(result, key=lambda x: x[2])
            candidate.append(result[0])

        candidate = sorted(candidate, key=lambda x: x[2])
        return candidate[0]
def run_train(sess, train_file):
    """Training the model
    """
    sensor_data = []
    sensor_loc = []

    with open(train_file, "r") as f:
        header = f.readline()
        reader = csv.reader(f, delimiter=",")
        for row in reader:
            x_location = int(row[1]) // 15 + 2
            sensor_data.append([int(row[0]), float(row[5]), float(row[6]), float(row[7])])  # Trial, V, Grad, Ref_force
            sensor_loc.append(loc2array(FLAGS.num_class, x_location))

    train_seq = preprocessing(sensor_data)
    total_batch = np.shape(train_seq)[0] // FLAGS.batch_size

    loc_input = np.array(sensor_data)[:, (1, 2)]  # V, gradV
    sensor_loc = np.array(sensor_loc)  # Location of pressure
    sensor_output = np.array(sensor_data)[:, 3]  # Ref_Force

    X = tf.get_collection('input')[0]
    V = tf.get_collection('input')[1]
    L = tf.get_collection('ground_truth')[0]
    Y = tf.get_collection('ground_truth')[1]

    for epoch in range(1, FLAGS.total_epoch+1):
        avg_cost = 0.

        # Training step
        for i in range(total_batch):
            data_idxs = train_seq[i * FLAGS.batch_size:(i + 1) * FLAGS.batch_size]
            seq_idxs = np.array([data_idxs - n for n in reversed(range(0, FLAGS.seq_length))]).T

            seq_x = np.reshape(loc_input[seq_idxs], [-1, FLAGS.seq_length, FLAGS.input_dim])
            v = np.reshape(np.array(sensor_data)[:, 1][data_idxs], [-1, 1])
            seq_l = np.reshape(sensor_loc[data_idxs], [-1, FLAGS.num_class])
            seq_y = np.reshape(sensor_output[data_idxs], [-1, 1])

            _, _cost = sess.run(tf.get_collection('train_ops'), feed_dict={X: seq_x, V: v, L: seq_l, Y: seq_y})
            avg_cost += _cost / total_batch
        print("Epoch: {}, Cost: {:.4}".format(epoch, avg_cost))
    print("Localization - Optimization Finished!")
Пример #26
0
def test_embedding():
    config = configparser.ConfigParser()
    config.read(global_config_path)
    stopword_path = config["GENERAL"]["stop_word_path"]
    datapath = config["GENERAL"]["test_path"]
    pretrain_path = config["WORD_EMBED"]["pretrain_path"]
    x, _ = utils.preprocessing(datapath)
    vo, sents = utils.create_vocab(x, stopword_path)
    vecs = utils.word2vec(vo, sents)
    print(sents[0])
    pretrainVecLayer = wordEmbed.PreTrainEmbedding(
        vo, pretrain_embedding_path=pretrain_path)
    result = pretrainVecLayer.forward(torch.LongTensor([vecs[0]]))
    print(result)
    # mock bag of word
    pretrainVecLayer_nobow = wordEmbed.PreTrainEmbedding(
        vo, pretrain_embedding_path=pretrain_path, bow=False)
    temp = pretrainVecLayer_nobow.forward(torch.LongTensor([vecs[0]]))
    temp = torch.sum(temp, 1) / len(vecs[0])
    print(temp)
Пример #27
0
def lexrank(raw_sent, word_embeddings, glove_dim):
    """
    Inspired by pagerank, lexrank considers each sentence as a node,
    give the weight to each edge by calculating cosine_similarity.
    And rank sentences by their score (which can represent the importance but also repeatability...).
    :param raw_sent: processed sentences in WPs.
    :param word_embeddings: word vectors by glove.
    :param glove_dim: the dimension of each word vector.
    :return: a list of ranked sentences(index 1) with their score(index 0).
    """
    num_sent = len(raw_sent)
    # preprocessing
    processed_sentences = preprocessing(raw_sent)
    # word and sentence representation
    sentence_vectors = []
    for i in processed_sentences:
        if len(i) != 0:
            v = sum([
                word_embeddings.get(w, np.zeros((glove_dim, )))
                for w in i.split()
            ]) / (len(i.split()) + 0.001)
        else:
            v = np.zeros((glove_dim, ))
        sentence_vectors.append(v)
    # create a matrix
    sim_mat = np.zeros([num_sent, num_sent])
    # initial sim_mat by cosine_similarity
    for i in range(num_sent):
        for j in range(num_sent):
            if i != j:
                sim_mat[i][j] = cosine_similarity(
                    sentence_vectors[i].reshape(1, glove_dim),
                    sentence_vectors[j].reshape(1, glove_dim))[0, 0]
    # turn to graph
    nx_graph = nx.from_numpy_array(sim_mat)
    # pagerank algorithm
    scores = nx.pagerank(nx_graph)
    # ranked sentence
    ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(raw_sent)),
                              reverse=True)
    return ranked_sentences
Пример #28
0
def prepare_data(root):
    for directory, subdirectories, files in os.walk(root):
        print('directory', directory)
        for subdirectory in subdirectories:
            folder = root + "/" + subdirectory
            print("folder", folder)
            for files in os.listdir(folder):
                image = cv2.imread(os.path.join(folder, files))
                image = utils.preprocessing(image)
                image = utils.segmenting(image)

                image = utils.resizing(image)
                value = image.flatten()
                value = value.astype(float)
                label = mapping[subdirectory]
                value = np.hstack((label, value))
                df = pd.DataFrame(value).T
                df = df.sample(frac=1)  # shuffle the dataset

                with open('train_foo.csv', 'a') as dataset:
                    df.to_csv(dataset, header=False, index=False)
Пример #29
0
def gen_probabilities(image):
    """Call PyTorch ResNet + Fine-tuned net.
    
    @param image: RGB face image.
    @return probability vector of size 23.
    """

    # first get the image into standard form
    image = Image.fromarray(image)
    image = image.convert('RGB')
    image = preprocessing(image).unsqueeze(0)
    image = Variable(image, volatile=True)

    # pass image through ResNet to get embedding
    embedding = embedder(image)
    embedding = embedding.squeeze(2).squeeze(2)
    # pass image through model to get prediction
    log_probas = model(embedding)
    probas = torch.exp(log_probas)  # probabilities

    return probas
Пример #30
0
def num_recognition(img, model):
    """

    :param img:
    :param cnn:
    :return:
    """
    # result = cnn.predict(img)
    # result = cnn.predict_new(img)
    print(np.max(img))
    # img = 255 - img
    # img = (img - np.min(img)) / (np.max(img) - np.min(img))

    img = preprocessing(img)

    # plt.imshow(img)
    # plt.show()

    result = np.argmax(
        model.predict(
            np.expand_dims(np.expand_dims(img, -1), 0)))

    return result
Пример #31
0
    def fetch_cosmo(self):
        '''
        End to end function to fetch data from cosmopolitan sitemap all the way to counting blog score from each blog, storing it in a dataframe and calculating
        the sum across all the blogs.
        Returns:
            df : pandas.DataFrame - Dataframe containing each fashion attribute in the set self.category and their corresponding total blog score 
        '''
        # Fetch all URLS from the cosmo sitemap
        scraper = cosmoscraper
        urls = scraper.get_urls(scraper)

        #Get the corpus of all the blogs in URLs
        corpus = []
        for url in urls:
            try:
                txt = scraper.get_txt_from_blog(scraper, url)
            except:
                pass
            else:
                corpus.append(txt)

        #Initializing the dataframe for self.category
        df = initialize_df(self.path_to_json, self.category)

        #Get the normalized word frequency
        for text_string in corpus:
            count_tf_from_text(df, self.category, preprocessing(text_string))

        #Store the sum of the blog score
        df["cosmo"] = df.sum(axis=1)

        #Drop all the columns containing individual blog score
        for column in df.columns:
            if (column[-1].isdigit()):
                df = df.drop(column, axis=1)

        return df
Пример #32
0
 def _grasp(self, color, depth):
     color_heightmap, depth_heightmap = utils.generate_heightmap(
         color, depth, self.camera_info, self.background_color,
         self.background_depth, self.voxel_size)
     graspable = np.zeros((self.grasp_angle, depth_heightmap.shape[1],
                           depth_heightmap.shape[0]))
     for i in range(self.grasp_angle):
         angle = -np.degrees(np.pi / self.grasp_angle * i)
         rotated_color_heightmap, rotated_depth_heightmap = utils.rotate_heightmap(
             color_heightmap, depth_heightmap, angle)
         color_tensor, depth_tensor = utils.preprocessing(
             rotated_color_heightmap, rotated_depth_heightmap)
         if self.use_cuda:
             color_tensor = color_tensor.cuda()
             depth_tensor = depth_tensor.cuda()
         predict = self.grasp_net.forward(color_tensor, depth_tensor)
         grasp = predict.detach().cpu().numpy()[0, 1]
         affordance = cv2.resize(grasp,
                                 dsize=(grasp.shape[1] * self.scale,
                                        grasp.shape[0] * self.scale))
         affordance[rotated_depth_heightmap == 0] = 0.0  # Background
         # affordance[depth_heightmap==0] = 0.0 # Background
         graspable[i, :, :] = affordance
     return color_heightmap, depth_heightmap, graspable