def test_image(args, model): if args.detector == 'dlib': import dlib elif args.detector == 'faceboxes': from utils.face_detector import FaceDetectorFaceboxes model.eval() device = torch.device("cuda" if args.gpu else "cpu") image = Image.open(args.image).convert('RGB') if args.resize > 0: image = resize(image, args.resize) detector = None if args.detector == 'dlib': detector = dlib.get_frontal_face_detector() elif args.detector == 'faceboxes': MODEL_PATH = 'model/faceboxes.pb' detector = FaceDetectorFaceboxes(MODEL_PATH, gpu_memory_fraction=0.25, visible_device_list='0') segmenter = Segmenter(model, device, detector, mode=args.detector) result = segmenter.segment(PIL2opencv(image), args.remove_small_area) result = opencv2PIL(result) if args.save: result.save(args.save) if not args.unshow: result.show() image.show()
def test_video(args, model): if args.video == '0': cap = cv2.VideoCapture(0) else: cap = cv2.VideoCapture(args.video) w_win = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h_win = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) print(w_win, h_win) if args.resize > 0: short_size = args.resize if w_win > h_win: nw, nh = short_size, int(w_win * short_size / h_win) else: nw, nh = int(h_win * short_size / w_win), short_size else: nw, nh = w_win, h_win detector = None if args.detector == 'dlib': detector = dlib.get_frontal_face_detector() elif args.detector == 'faceboxes': MODEL_PATH = 'model/faceboxes.pb' detector = FaceDetectorFaceboxes(MODEL_PATH, gpu_memory_fraction=0.25, visible_device_list='0') device = torch.device("cuda" if args.gpu else "cpu") segmenter = Segmenter(model, device, detector, mode=args.detector) if args.save: fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter(args.save, fourcc, 20, (nh, nw), True) while True: frame = cap.read()[1] if frame is None: break frame = cv2.resize(frame, (nh, nw)) result = segmenter.segment(frame, args.remove_small_area) if args.save: out.write(result) if not args.unshow: cv2.imshow('image', result) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows() if args.save: out.release()
if mode == 1: yield (X_batch, y_batch) else: yield X_batch # очищаем матрицы порции для новой порции X_batch.fill(0) y_batch.fill(0) batch_index = 0 w2v = gensim.models.KeyedVectors.load_word2vec_format( word2vector_path, binary=not word2vector_path.endswith('.txt')) w2v_dims = len(w2v.syn0[0]) segmenter = Segmenter() tokenizer = Tokenizer() print('Collecting samples...') samples = [] all_words = set([PAD_WORD]) max_phrase_len = 0 if True: # добавляем пары предпосылка-вопрос из обучающего датасета with codecs.open(os.path.join(data_folder, qa_path), "r", "utf-8") as inf: loading_state = 'T' text = []
sys.stdout.encoding).strip().lower() phrases1 = [] segm_mode = raw_input( 'Use EOL markers (1) or segmenter (2) to split file to sentences?' ).strip() max_nb_facts = int( raw_input( 'maximum number of samples to read from file (-1 means all):\n> '). strip()) if max_nb_facts == -1: max_nb_facts = 10000000 if segm_mode == 2: segmenter = Segmenter() phrases0 = segmenter.split( codecs.open(path1, 'r', 'utf-8').readlines()) for phrase in enumerate(phrases): words = tokenizer.tokenize(phrase) if len(words) > 0: phrases1.append(words) if len(phrases1) >= max_nb_facts: break else: with codecs.open(path1, 'r', 'utf-8') as rdr: for phrase in rdr: words = tokenizer.tokenize(phrase) if len(words) > 0: phrases1.append(words) if len(phrases1) >= max_nb_facts:
# Created on: 00:59:12 import os import sys import numpy as np import gensim from copy import deepcopy sys.path.append( os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir)) from config import Config from utils import log from utils.segmenter import Segmenter from utils.tfidf import word_idf, sif_embedding, get_weighted_average from utils.tools import load_embedding, cosine config = Config() cut = Segmenter() logger = log.getLogger(__name__) class Embedding(object): def __init__(self, embedding_path, documents, data_seg): # default pre-trained word embedding self._word_embedding = gensim.models.KeyedVectors.load_word2vec_format( embedding_path, binary=True) # Topic clusters documents self.documents = documents self.data_seg = data_seg self.weights_of_words = word_idf(self.documents) # initized some components for sentence embedding self.word2idx, self.wv_mat = None, None self._word2vec()