Exemplo n.º 1
0
    def __init__(self, vec_dim, dim, options={}):
        """
        Instantiate the Layer (which will later be used to connect tensors)

        Parameters
        ----------
        vec_dim : int (width of the vector generated by the vectorizer)

        dim : int (width of vectors in the rest of the network)
        """
        super(StackedBiLSTM, self).__init__()
        
        # Embedder (mostly to upscale the incoming character vectors)
        self.embedder     = Embedder(vec_dim, dim)
        
        # Left to right Stacked LSTM
        self.left2rightA  = LSTMCell(dim, options=options)            # Outputs two length-dim vectors
        self.left2rightB  = LSTMCell(dim, options=options)            # Outputs two length-dim vectors
        
        # Right to left Stacked LSTM
        self.right2leftA  = LSTMCell(dim, options=options)            # Outputs two length-dim vectors
        self.right2leftB  = LSTMCell(dim, options=options)            # Outputs two length-dim vectors

        # Output decision layer
        self.splitLayer  = SplitLayer(dim, options=options)      # Outputs a one-dimensional length-2 vector
        
        self.Lzeros      = pu.var_zeros(dim, ttype=options.get('ttype'))    # Zeros for initialization from left
        self.Lhzeros     = pu.var_zeros(dim, ttype=options.get('ttype'))    # Zeros for initialization (hidden layer)
        self.Rzeros      = pu.var_zeros(dim, ttype=options.get('ttype'))    # same, for the right
        self.Rhzeros     = pu.var_zeros(dim, ttype=options.get('ttype'))
        self.training    = False   # By default, but set to True during training with 'train()'
Exemplo n.º 2
0
def calc_embeddings(docs: List[str], batch_size: int, root: str) -> np.ndarray:
    """Calculate embeddings (in batches).

    Args:
        docs: List of documents filenames.
        batch_size: Batch size.
        root: Root directory.

    Returns:
        Numpy array of (N, 768) of texts embeddings.

    """
    embedder = Embedder()
    all_embeddings = np.zeros((len(docs), 768), dtype=np.float32)

    iters = len(docs) // batch_size
    if len(docs) % batch_size > 0:
        iters += 1

    for i in trange(iters):
        batch = docs[i * batch_size:(i + 1) * batch_size]
        filenames = [os.path.join(root, doc) for doc in batch]
        texts = [get_text_reduced(x, maxlen=512) for x in filenames]
        embeddings = embedder.embed(texts)
        all_embeddings[i * batch_size:(i + 1) * batch_size] = embeddings
    return all_embeddings
def build_model(config, src_field, tgt_field):
    embed_dim = config.embed_dim
    hidden_dim = config.hidden_dim
    num_layers = config.num_layers
    dropout_p = config.dropout_p
    attention_type = config.attention
    beam_width = config.beam_width

    model = Seq2Seq(
        LstmEncoder(Embedder(src_field, embed_dim, dropout_p), hidden_dim,
                    num_layers, dropout_p),
        LstmDecoder(Embedder(tgt_field, embed_dim, dropout_p), hidden_dim,
                    num_layers, dropout_p,
                    build_attention(hidden_dim, attention_type), beam_width),
    )
    return model
Exemplo n.º 4
0
    def __init__(self, env, loss_fn, optim, gym=gym, preprocess_func=None, utility_func=None, embed=False, flatten=False, frameskip=1, batch_size=32, cuda=False, ale=False, init_skip=0, mid_skip=0, caution=0.0, render_skip=1):

        self.n_agents = 1

        if utility_func == None:
            raise Exception("Need utility function")
        if preprocess_func == None:
            raise Exception("Need preprocess function")
        if self.n_agents < 1:
            raise Exception("Need at least 1 agent.")
        if self.n_agents > 1:
            raise NotImplementedError

        super(Coach, self).__init__()
        print("Initializing Coach")

        self.ale = ale
        self.name = env
        self.gym = gym
        self.dones = [False for _ in range(self.n_agents)]
        self.length = 0
        self.memory = Memory(self.n_agents)
        self.episode = []
        self.embedder = Embedder()
        self.embedding = embed
        self.flatten = flatten
        self.best_episode = []
        self.p_obs = torch.zeros(1)
        self.values = []
        self.frameskip = frameskip
        self.loss_fn=loss_fn
        self.optim = optim
        self.preview = False
        self.single_step = False
        self.norm = True
        self.render = False
        self.debug_render = False
        self.caution = caution#0.95
        self.life = -1
        self.life = -1
        self.init_skip = init_skip#70
        self.render_frameskip = render_skip
        self.mid_skip = mid_skip#50
        self.steps = 0
        self.batch_size = batch_size
        self.preprocess_frame = preprocess_func
        self.update = utility_func
        self.cuda = cuda

        atexit.register(self.end)
        #create environments
        self.envs = [self.gym.make(self.name) for _ in range(self.n_agents)]
        self.env = 0

        self.actions = self.envs[0].action_space.n

        os.system("toilet {}".format(self.name))
Exemplo n.º 5
0
def build_model(cfg, char_voca, word_voca=None, gazet=None, pos_voca=None):
    """Build Neural Network based Ner model (Embedder + Classifier)"""

    # Build Embedder
    embedder = Embedder(window=cfg.window,
                        char_voca=char_voca,
                        word_voca=word_voca,
                        jaso_dim=cfg.jaso_dim,
                        char_dim=cfg.char_dim,
                        word_dim=cfg.word_dim,
                        gazet=gazet,
                        gazet_embed=True,
                        pos_enc=True,
                        phoneme=True,
                        pos_voca_size=len(pos_voca),
                        pos_dim=cfg.pos_dim)

    print('Total Embedding_size: ', embedder.embed_dim)

    encoder_name, decoder_name = cfg.model_name.lower().split('-')

    # Build Encoder
    if encoder_name == 'fnn5':
        encoder = models.Fnn5(context_len=cfg.context_len,
                              in_dim=embedder.embed_dim,
                              hidden_dim=cfg.hidden_dim)
    elif encoder_name == 'cnn7':
        encoder = models.Cnn7(in_dim=embedder.embed_dim,
                              hidden_dim=cfg.hidden_dim)
    elif encoder_name == 'cnn8':
        encoder = models.Cnn8(context_len=cfg.context_len,
                              in_dim=embedder.embed_dim,
                              hidden_dim=cfg.hidden_dim)
    elif encoder_name in ['gru', 'lstm', 'sru']:
        encoder = models.RnnEncoder(context_len=cfg.context_len,
                                    in_dim=embedder.embed_dim,
                                    out_dim=cfg.hidden_dim,
                                    cell=encoder_name)
    else:
        raise ValueError('unknown model name: %s' % cfg.model_name)

    # Build Decoder
    if decoder_name.lower() == 'fc':
        decoder = models.FCDecoder(in_dim=encoder.out_dim,
                                   hidden_dim=cfg.hidden_dim,
                                   n_tags=cfg.n_tags)
    elif decoder_name in ['gru', 'lstm', 'sru']:
        decoder = models.RnnDecoder(in_dim=encoder.out_dim,
                                    hidden_dim=cfg.hidden_dim,
                                    n_tags=cfg.n_tags,
                                    num_layers=cfg.num_layers,
                                    cell=decoder_name)

    model = models.Ner(embedder, encoder, decoder)

    return model
Exemplo n.º 6
0
def main():
    args = arg_parse()
    docs = [
        os.path.join(dir, f) for dir in os.listdir(args.root)
        for f in os.listdir(os.path.join(args.root, dir))
    ]
    docs = sorted(docs)

    index = Indexer(docs, args.index, args.root)
    embedder = Embedder()

    # L0
    q_pos, q_neg = query_expand(args.query)
    # Get all OR-ed tokens
    q_pos_expand = re.sub(r" ", " AND ", q_pos)
    hits = index.query_boolean(q_pos_expand.split())
    # Remove all NOT-ed tokens
    if q_neg:
        for token in q_neg.split():
            term = index.stemmer.stem(token)
            try:
                not_posting = index.tfidf(index.index[term])
            except KeyError:
                not_posting = []
            hits = not_and_postings(not_posting, hits)

    if not hits:
        print("nothing found")
        return

    hits = sorted(hits, key=lambda item: item[1], reverse=True)
    hits = hits[:args.l0_size]

    # L1
    doc_ids = [x[0] for x in hits]
    filenames = [os.path.join(args.root, docs[i]) for i in doc_ids]
    texts = [get_text_reduced(x, maxlen=512) for x in filenames]

    if args.batch_size >= args.l0_size:
        embeddings = embedder.embed(texts)
    else:
        embeddings = batch_embed(embedder, texts, args.batch_size)
    query_emb = embedder.embed([q_pos])[0]
    dist_cos = [cosine(query_emb, e) for e in embeddings]
    idx_cos = np.argsort(dist_cos)

    # Render
    q_red = query_reduce(args.query)
    resorted = [doc_ids[i] for i in idx_cos]
    for i, id in enumerate(resorted[:args.l1_size]):
        print("\n{}:".format(i))
        index.render_file(q_red.split(), docs[id])
        orig_pos = idx_cos[i]
        print("\tL0 rank = {}; tf-idf = {:.3f}; cos-sim = {:.3f}".format(
            orig_pos, hits[orig_pos][1], 1 - dist_cos[orig_pos]))
Exemplo n.º 7
0
 def __init__(self,
              trained_model_PATH,
              vocabulary='./XX/vocabulary.pkl',
              tag='./XX/tags.pkl',
              status='raw'):
     self.trained_model_PATH = trained_model_PATH
     self.vocabulary = vocabulary
     self.tag = tag
     self.model = None
     self.embedding = Embedder(vocabulary, tag)
     self.loadmodel()
Exemplo n.º 8
0
    def __init__(self, vec_dim, dim, options={}):
        """
        Instantiate the object and set options
        """
        print("options:", options)
        self.set_options(options, default)
        options['ttype'] = self.get('ttype')
        self.validation_graph = None
        # self.bilstm           = StackedBiLSTM(vec_dim, dim, options=options)         # A Bi-directional LSTM
        self.bilstm = SimpleBiLSTM(dim,
                                   options=options)  # A Bi-directional LSTM
        self.embedder = Embedder(
            vec_dim, dim, options=options
        )  # Embedder (mostly for upscaling incoming vectors)
        self.class_weights = pu.torchvar(self.get('class_weights'),
                                         ttype=self.get('ttype'))

        # Loss functions
        self.coef1 = pu.torchvar([0.1])
        self.coef2 = pu.torchvar([0.2])
        self.coef3 = pu.torchvar([0.3])
        self.coef4 = pu.torchvar([0.4])
        self.accuracyLoss = pu.AccuracyLoss()
        self.skewLoss = pu.SkewedL1Loss(self.class_weights)
        self.criterion_CE = nn.CrossEntropyLoss(weight=self.class_weights,
                                                size_average=False)
        self.criterion_MSE = nn.MSELoss()
        self.orig_loss = None

        if torch.cuda.is_available():
            self.bilstm = self.bilstm.cuda()
            self.coef1 = self.coef1.cuda()
            self.coef2 = self.coef2.cuda()
            self.coef3 = self.coef3.cuda()
            self.coef4 = self.coef4.cuda()
            self.accuracyLoss = self.accuracyLoss.cuda()
            self.skewLoss = self.skewLoss.cuda()
            self.criterion_CE = self.criterion_CE.cuda()
            self.criterion_MSE = self.criterion_MSE.cuda()

        self.eval_mode()  # eval mode by default
def main():
    embedder = Embedder()
    parser = DocParser()
    # iterate through grobid
    with open('grobid_data.pkl', 'wb') as output:
        for subdir, dirs, files in os.walk(grobid_path):
            print(len(files))
            count = 0
            for file in files:
                print(count)
                count += 1
                # print(os.path.join(subdir, file))

                doc = parser.parseXML(os.path.join(subdir, file))
                doc.id = str(file).split('.')[0]
                if len(doc.abstract) == 0:
                    continue
                doc.embedding = embedder.embed(doc.abstract)
                # pair = variablesFromPair((doc.abstract, doc.title), word_index, embedding_map)
                # if (len(pair[0]) == 0 or len(pair[1]) == 0):
                #     continue
                # doc.embedding = encode(encoder, pair[0])
                pickle.dump(doc, output, pickle.HIGHEST_PROTOCOL)
Exemplo n.º 10
0
    # epochs for METN pre training
    if args.property is 'QED':
        embedder_epochs_num = 1
    elif args.property is 'DRD2':
        embedder_epochs_num = 12
    else:
        print("property must bt 'QED 'or 'DRD2'")
        exit()

    if args.conditional or args.no_pre_train:
            embedder_epochs_num = 0

    if args.is_CDN is True:
        _, _, boundaries = create_dataset(args.property, rebuild_dataset=False)
        dataset_CDN = Dataset('dataset/' + args.property + '/CDN/CDN')
        model_CDN = Embedder(dataset_CDN, 'CDN', args).to(device)
        embedder_epochs_num = args.epochs
        fit(args, model_CDN, embedder_epochs_num, boundaries, is_validation=True)
        exit()

    # prepare dataset
    dataset_file_A, dataset_file_B, boundaries = create_dataset(args.property, args.rebuild_dataset)
    dataset_A = Dataset(dataset_file_A, use_atom_tokenizer=args.tokenize, isB=False)
    dataset_B = Dataset(dataset_file_B, use_atom_tokenizer=args.tokenize, isB=True)

    # create  and pre-train the embedders (METNs)
    model_A = Embedder(dataset_A, 'Embedder A', args).to(device)
    fit(args, model_A, embedder_epochs_num, boundaries, is_validation=True)
    model_B = Embedder(dataset_B, 'Embedder B', args).to(device)
    fit(args, model_B, embedder_epochs_num, boundaries, is_validation=False)
Exemplo n.º 11
0
from detectors import DetectorSSD, DetectorVJ, DetectorHOG, DetectorMMOD, DetectorLBP
from embedder import Embedder
from recognizer import Recognizer
import recognize_process as rp

detector_vj = DetectorVJ()
detector_ssd = DetectorSSD()
detector_hog = DetectorHOG()
detector_mmod = DetectorMMOD()
detector_lbp = DetectorLBP()
embedder = Embedder()
recognizer = Recognizer()


# вычисление областей ROI всеми детекторами на датасете
def calc_all_detectors():

    print('Haar detector calculating...')
    detector_vj.calc_dataset()

    print('SSD detector calculating...')
    detector_ssd.calc_dataset()

    print('HOG detector calculating...')
    detector_hog.calc_dataset()

    print('MMOD detector calculating...')
    detector_mmod.calc_dataset()

    print('LBP detector calculating...')
    detector_lbp.calc_dataset()
Exemplo n.º 12
0
        for part in parts:
            if len(part.split()) > 1:
                labels.append(part.split()[0] +
                              ''.join(''.join([w[0].upper(), w[1:].lower()])
                                      for w in part.split()[1:]))
        if useSynonyms:
            predicates = [max(part.split(), key=len) for part in parts]
            if predicates is not None and len(predicates) > 0:
                for predicate in predicates:
                    for part in list(parts):
                        if predicate in part:
                            for syn in gloveModel.gloveModel.most_similar(
                                    predicate.lower()):
                                parts.append(part.replace(predicate, syn[0]))
        if len(parts) == 0:
            resultsExists = False
            parts = list(splitter.split(gen_question, min=minLen, max=maxLen))
    # create embedder part
    vectors = []
    for part in parts:
        vectors.append(gloveModel.getVector(part))
    return vectors, parts, pos, gen_question, labels, resultsExists


# ===== main testing =====
if __name__ == "__main__":
    pp(
        processQuestion(Embedder('../glove.6B.50d.txt'),
                        'who was named as president of the USA',
                        useAPI=True,
                        useSynonyms=True))
Exemplo n.º 13
0
 def __init__(self):
     self.messenger = Messenger()
     self.embedder = Embedder()
Exemplo n.º 14
0
CARRIER = "../Carrier2.jpg"
BIOMETRIC = "../FingerPrint.png"
EXPECTED = "./Expected.png"

biometric = cv2.imread(BIOMETRIC)
carrier = cv2.imread(CARRIER)
expected = cv2.cvtColor(cv2.imread(EXPECTED), cv2.COLOR_BGR2GRAY)

BIOMETRIC_SIZE = biometric.shape[0]
actualSize = carrier.shape
corners = [[0, 0], [0, 0], [0, 0], [0, 0]]

embedded = carrier.copy()

em = Embedder(PASSES, KEY, CHANNELS, DEPTH, MASK)
ex = Extractor(PASSES, KEY, ~MASK, SHIFT)

em_psnr = []
ex_psnr = []
for iteration in range(ITERATIONS):
    embedded = em.embed(biometric, embedded)
    filename = './CarrierVsRecoved/Iteration_' + str(iteration) + '.jpg'
    cv2.imwrite(filename, embedded)
    embedded = cv2.imread(filename)
    em_psnr.append(cv2.PSNR(carrier, embedded))

    extracted = ex.extract(embedded, corners, actualSize, BIOMETRIC_SIZE)
    ex_psnr.append(cv2.PSNR(expected, extracted))
    print(iteration, em_psnr[-1], ex_psnr[-1])
Exemplo n.º 15
0
 def __init__(self, cfg):
     super(GenericModel, self).__init__()
     self.cfg = cfg
     self.embedder = Embedder(cfg['embedder'])
     self.reasoner = Reasoner(cfg['reasoner'])
Exemplo n.º 16
0
carrier = gl.washBitDepth(carrier, ~GUIDE_MASK)
carrier = gl.generateGuideLines(carrier, GUIDE_MASK)

x1 = int(CARRIER_SIZE[0] / 2) - int(CARRIER_SIZE[0] / 4)
x2 = int(CARRIER_SIZE[0] / 2) + int(CARRIER_SIZE[0] / 4)

y1 = int(CARRIER_SIZE[1] / 2) - int(CARRIER_SIZE[1] / 4)
y2 = int(CARRIER_SIZE[1] / 2) + int(CARRIER_SIZE[1] / 4)
crop = (x1, y1, x2, y2)

untouched = carrier[crop[0]:crop[2], crop[1]:crop[3]]

em_psnr = []
ex_psnr = []
for passes in range(MAX_PASSES):
    em = Embedder(passes, KEY, CHANNELS, DEPTH, MASK)
    ex = Extractor(passes, KEY, ~MASK, SHIFT)

    embedded = em.embed(biometric, carrier)

    cropped = embedded[crop[0]:crop[2], crop[1]:crop[3]]
    margins = gl.getMargins(cropped, GUIDE_MASK)
    corners = gl.marginsToCorners(margins)
    actualSize = gl.getActualSize(cropped, margins)

    extracted = ex.extract(cropped, corners, actualSize, BIOMETRIC_SIZE)
    ex_psnr.append(cv2.PSNR(expected, extracted))
    em_psnr.append(cv2.PSNR(np.uint8(cropped), np.uint8(untouched)))
    print(passes, em_psnr[-1], ex_psnr[-1])

cv2.imwrite('./final.png', extracted)
        for pos in range(max_seq_len):
            for i in range(0, d_model, 2):
                pe[pos, i] = math.sin(pos / (10000**((2 * i) / d_model)))
                pe[pos,
                   i + 1] = math.cos(pos / (10000**((2 * (i + 1)) / d_model)))

        self.pe = pe.unsqueeze(0)
        self.pe.requires_grad = False

    def forward(self, x) -> torch.Tensor:
        ret = math.sqrt(self.d_model) * x + self.pe
        return ret


if __name__ == "__main__":
    train_dl, val_dl, test_dl, TEXT = get_IMDb_Dataloaders_and_TEXT(
        max_length=256, batch_size=24)
    batch = next(iter(train_dl))

    net1 = Embedder(TEXT.vocab.vectors)
    net2 = PositionalEncoder(d_model=300, max_seq_len=256)

    with timer("forward"):
        x = batch.Text[0]
        x1 = net1(x)
        x1 = x1.to(device)
        x2 = net2(x1)

    assert x1.shape == torch.Size([24, 256, 300])
    assert x2.shape == torch.Size([24, 256, 300])
Exemplo n.º 18
0
    def __init__(
            self,
            params,
            input_vocabulary,
            output_vocabulary,
            anonymizer):
        self.params = params

        self._pc = dy.ParameterCollection()

        if params.new_version:
            self.controller = Controller(output_vocabulary)
        else:
            self.controller = None
        # Create the input embeddings
        self.input_embedder = Embedder(self._pc,
                                       params.input_embedding_size,
                                       name="input-embedding",
                                       vocabulary=input_vocabulary,
                                       anonymizer=anonymizer)

        # Create the output embeddings
        self.output_embedder = Embedder(self._pc,
                                        params.output_embedding_size,
                                        name="output-embedding",
                                        vocabulary=output_vocabulary,
                                        anonymizer=anonymizer)

        # Create the encoder
        encoder_input_size = params.input_embedding_size
        if params.discourse_level_lstm:
            encoder_input_size += params.encoder_state_size / 2

        self.utterance_encoder = Encoder(params.encoder_num_layers,
                                         encoder_input_size,
                                         params.encoder_state_size,
                                         self._pc)

        # Positional embedder for utterances
        attention_key_size = params.encoder_state_size
        if params.state_positional_embeddings:
            attention_key_size += params.positional_embedding_size
            self.positional_embedder = Embedder(
                self._pc,
                params.positional_embedding_size,
                name="positional-embedding",
                num_tokens=params.maximum_utterances)

        # Create the discourse-level LSTM parameters
        if params.discourse_level_lstm:
            self.discourse_lstms = du.create_multilayer_lstm_params(
                1, params.encoder_state_size, params.encoder_state_size / 2, self._pc, "LSTM-t")
            self.initial_discourse_state = du.add_params(self._pc, tuple(
                [params.encoder_state_size / 2]), "V-turn-state-0")

        # Snippet encoder
        final_snippet_size = 0
        if params.use_snippets and not params.previous_decoder_snippet_encoding:
            snippet_encoding_size = int(params.encoder_state_size / 2)
            final_snippet_size = params.encoder_state_size
            if params.snippet_age_embedding:
                snippet_encoding_size -= int(
                    params.snippet_age_embedding_size / 4)
                self.snippet_age_embedder = Embedder(
                    self._pc,
                    params.snippet_age_embedding_size,
                    name="snippet-age-embedding",
                    num_tokens=params.max_snippet_age_embedding)
                final_snippet_size = params.encoder_state_size \
                    + params.snippet_age_embedding_size / 2

            self.snippet_encoder = Encoder(params.snippet_num_layers,
                                           params.output_embedding_size,
                                           snippet_encoding_size,
                                           self._pc)
        token_predictor = construct_token_predictor(self._pc,
                                                    params,
                                                    output_vocabulary,
                                                    attention_key_size,
                                                    final_snippet_size,
                                                    anonymizer)

        # 注意:此处在input增加了decoder_state_size维度
        self.decoder = SequencePredictor(
            params,
            params.output_embedding_size +
            attention_key_size + params.decoder_state_size,
            self.output_embedder,
            self._pc,
            token_predictor)

        self.trainer = dy.AdamTrainer(
            self._pc, alpha=params.initial_learning_rate)
        self.dropout = 0.
Exemplo n.º 19
0
def make_dataset(images_path='dataset'):

    detector = DetectorSSD()

    embedder = Embedder()

    images_path = images_path.replace('/', os.path.sep)

    ind_image_paths = list(paths.list_images(images_path))

    # списки для лиц и имен, взаимосоответствующие по индексу
    known_ems = []
    known_names = []

    for (i, image_path) in enumerate(ind_image_paths):
        print(i + 1)
        # получение имя из пути к файлу
        name = image_path.split(os.path.sep)[-2]

        image = cv2.imread(image_path)

        # ОБЫЧНОЕ
        temp_img = image.copy()
        face = detector.calc_image(temp_img)[0]

        embeddings = embedder.calc_face(face)
        known_ems.append(embeddings.flatten())
        known_names.append(name)

        # ТЕМНЕЕ 4
        for i in range(1, 5):
            temp_img = image.copy()
            face = detector.calc_image(ie.darken(temp_img, i / 10))
            if len(face) > 0:
                embeddings = embedder.calc_face(face[0])
                known_ems.append(embeddings.flatten())
                known_names.append(name)

        # СВЕТЛЕЕ 4

        for i in range(1, 5):
            temp_img = image.copy()
            face = detector.calc_image(ie.brighten(temp_img, i / 10))
            if len(face) > 0:
                embeddings = embedder.calc_face(face[0])
                known_ems.append(embeddings.flatten())
                known_names.append(name)

        # ДОЖДЬ 3
        temp_img = image.copy()
        face = detector.calc_image(ie.add_rain(temp_img, rain_type='drizzle'))
        if len(face) > 0:
            embeddings = embedder.calc_face(face[0])
            known_ems.append(embeddings.flatten())
            known_names.append(name)

        temp_img = image.copy()
        face = detector.calc_image(ie.add_rain(temp_img, rain_type='heavy'))
        if len(face) > 0:
            embeddings = embedder.calc_face(face[0])
            known_ems.append(embeddings.flatten())
            known_names.append(name)

        temp_img = image.copy()
        face = detector.calc_image(
            ie.add_rain(temp_img, rain_type='torrential'))
        if len(face) > 0:
            embeddings = embedder.calc_face(face[0])
            known_ems.append(embeddings.flatten())
            known_names.append(name)

        # ТУМАН 4
        for i in range(1, 5):
            temp_img = image.copy()
            face = detector.calc_image(ie.add_fog(temp_img, i / 10))
            if len(face) > 0:
                embeddings = embedder.calc_face(face[0])
                known_ems.append(embeddings.flatten())
                known_names.append(name)

    embeddings = {'embeddings': known_ems, 'names': known_names}
    f = open('weather_ems/ems_weather_4.pickle', 'wb')
    f.write(pickle.dumps(embeddings))
    f.close()

    print(len(known_ems), len(known_names))
    return known_ems, known_names