def compute_both_neighbours(chosen, path_composed_emb, path_observed_emb): """ Returns the neighbours of the composed/observed representations of the chosen words in an observed space. """ original_nearest_neighbours = {} composed_nearest_neighbours = {} observed_space = Word2VecKeyedVectors.load_word2vec_format( path_observed_emb, binary=False) observed_space.vectors = normalize(observed_space.vectors, norm="l2", axis=1) composed_space = Word2VecKeyedVectors.load_word2vec_format( path_composed_emb, binary=False) composed_space.vectors = normalize(composed_space.vectors, norm="l2", axis=1) chosen_words = set([tup[0] for tup in chosen]) composed_words = composed_space.wv.vocab observed_words = observed_space.wv.vocab for word, rank in chosen: original_vec = observed_space.get_vector(word) composed_vec = composed_space.get_vector(word) original_composed_cosine = np.dot(original_vec, composed_vec) sims = observed_space.similar_by_vector(vector=original_vec, topn=False) neighbours = [(observed_space.index2word[widx], sims[widx]) for widx in range(len(sims))] neighbours.append(("%s\_c" % word, original_composed_cosine)) sorted_neighbours = sorted(neighbours, key=lambda tup: tup[1], reverse=True) print("neighbours of the original representation of %s" % (word)) c_idx = [ idx for idx, tup in enumerate(sorted_neighbours) if tup[0] == "%s\_c" % word ] print(word, original_composed_cosine, c_idx) original_nearest_neighbours[word] = sorted_neighbours[:11] print(original_nearest_neighbours[word]) comp_index = c_idx[0] if comp_index >= 5: composed_nearest_neighbours[ "%s\_c" % word] = sorted_neighbours[comp_index - 5:comp_index + 6] else: composed_nearest_neighbours["%s\_c" % word] = sorted_neighbours[:11 - comp_index] print("neighbours of the composed representation of %s" % (word)) print(composed_nearest_neighbours["%s\_c" % word]) return original_nearest_neighbours, composed_nearest_neighbours
def read_gensim_model(file_name): extension = Path(file_name).suffix if extension == '.txt': model = Word2VecKeyedVectors.load_word2vec_format(file_name, binary=False) elif extension == '.bin' or extension == '.w2v': model = Word2VecKeyedVectors.load_word2vec_format(file_name, binary=True) else: raise Exception("unknown extension for embeddings file") return model
def get_latent(args): print("Loading embedding model...") model_name = 'WORD2VEC_' + args.target_dataset + '.model' embedding_model = Word2VecKeyedVectors.load(os.path.join(CONFIG.EMBEDDING_PATH, model_name)) print("Loading embedding model completed") full_data = [] df_data = pd.read_csv(os.path.join(CONFIG.DATASET_PATH, args.target_dataset, 'posts.csv'), header=None, encoding='utf-8-sig') short_code_list = [] row_list = [] csv_name = 'text_word2vec_' + args.target_dataset + '.csv' pbar = tqdm(total=df_data.shape[0]) for index, row in df_data.iterrows(): pbar.update(1) short_code = row.iloc[0] short_code_list.append(short_code) text_data = row.iloc[1] #full_data.append([text_data, short_code]) vector_list = [] for word in text_data.split(): vector_list.append(embedding_model.get_vector(word)) vector = np.mean(vector_list, axis=0) row_list.append(vector) del text_data pbar.close() result_df = pd.DataFrame(data=row_list, index=short_code_list, columns=[i for i in range(300)]) result_df.index.name = "short_code" result_df.sort_index(inplace=True) result_df.to_csv(os.path.join(CONFIG.CSV_PATH, csv_name), encoding='utf-8-sig') print("Finish!!!")
def emb_to_gensim(e): ''' Convert embedding to gensim format Parameters ---------- e: Embedding | instance of class Embedding Returns ------- gpairs: Word2VecKeyedVectors | Embedding in Gensim format ''' rank = np.shape(e.vectors)[1] gpairs = GensimPairs(rank) gpairs.add([word for word in e.words], [np.array(v) for v in e.vectors]) return gpairs
def test_doc2vec(): """ 测试doc2vec的效果 :return: 输出结果 """ documents = get_documents(cache=True, jieba=True) #加载模型, training继续训练模型 model = train_doc2vec(documents, training=True, epoch=200) #用于打印 documents = get_documents(cache=True, jieba=False) # 过滤出给的关键字fintags不在字典中的词语 ,所以这个词语没有词向量,无法计算相似度 filter_tags = [tag for tag in finTags if tag in model.wv] if finTags != filter_tags: print('给定的fintags这写关键字不在doc2vec生成的字典中, 请更改关键字或者扩充训练文档, 使得训练文档包含这个关键字', set(finTags) - set(filter_tags)) tagsvec = model.wv[filter_tags] keywords = [] for idx, doc in enumerate(documents): docvec = model.docvecs[idx] #计算所有tag与这个文档的相似度 tagssim = Word2VecKeyedVectors.cosine_similarities(docvec, tagsvec) maxsim = max(tagssim) keyword = finTags[list(tagssim).index(maxsim)] print(f"doc2vec计算的最接近的keyword是: {keyword}, 相似度是: {maxsim}, 文档是: {doc}") keywords.append(keyword) print(keywords) return keywords
def load_word2vec_binary(file): """ Load a word2vec embeddings in binary format as in the origin C tool. :param file: a binary file. :return: KeyedVectors """ return Word2VecKeyedVectors.load_word2vec_format(file, binary=True)
def test_persistence_word2vec_format(self): """Test storing/loading the model in word2vec format.""" tmpf = get_tmpfile('gensim_fasttext_w2v_format.tst') model = FT_gensim(sentences, min_count=1, size=10) model.wv.save_word2vec_format(tmpf, binary=True) loaded_model_kv = Word2VecKeyedVectors.load_word2vec_format(tmpf, binary=True) self.assertEqual(len(model.wv.vocab), len(loaded_model_kv.vocab)) self.assertTrue(np.allclose(model['human'], loaded_model_kv['human']))
def test_persistence_word2vec_format(self): """Test storing/loading the model in word2vec format.""" tmpf = get_tmpfile('gensim_fasttext_w2v_format.tst') model = FT_gensim(sentences, min_count=1, size=10) model.wv.save_word2vec_format(tmpf, binary=True) loaded_model_kv = Word2VecKeyedVectors.load_word2vec_format(tmpf, binary=True) self.assertEqual(len(model.wv.vocab), len(loaded_model_kv.vocab)) self.assertTrue(np.allclose(model['human'], loaded_model_kv['human']))
def load_embedding(embedding_dir): from gensim.models.keyedvectors import Word2VecKeyedVectors token_file = os.path.join(embedding_dir, "token_list.npy") token_list = np.load(token_file) vector_file = os.path.join(embedding_dir, "vector_list.npy") vector_list = np.load(vector_file) model = Word2VecKeyedVectors(vector_list.shape[1]) model.add(token_list, vector_list) return model.wv
def _create_keyed_vectors(self) -> KeyedVectors: kv = Word2VecKeyedVectors(vector_size=self.vector_dimension) if gensim.__version__[0] >= '4': kv.key_to_index = self._data().word2idx else: kv.vocab = _WordEmbedVocabAdapter(self._data()) kv.vectors = self.matrix kv.index2entity = list(self._data().words) return kv
def compute_neighbours(chosen, path_composed_emb, path_observed_emb, no_neighbours): """ Returns the neighbours of words from a composed space in an observed space. """ nearest_neighbours = {} observed_space = Word2VecKeyedVectors.load_word2vec_format( path_observed_emb, binary=False) observed_space.vectors = normalize(observed_space.vectors, norm="l2", axis=1) composed_space = Word2VecKeyedVectors.load_word2vec_format( path_composed_emb, binary=False) composed_space.vectors = normalize(composed_space.vectors, norm="l2", axis=1) chosen_words = set([tup[0] for tup in chosen]) composed_words = composed_space.wv.vocab observed_words = observed_space.wv.vocab for word, rank in chosen: original_vec = observed_space.get_vector(word) composed_vec = composed_space.get_vector(word) original_composed_cosine = np.dot(original_vec, composed_vec) neighbours = observed_space.similar_by_vector(vector=original_vec, topn=no_neighbours) neighbours.append(("%s\_c" % word, original_composed_cosine)) sorted_neighbours = sorted(neighbours, key=lambda tup: tup[1], reverse=True) c_idx = [ idx for idx, tup in enumerate(sorted_neighbours) if tup[0] == "%s\_c" % word ] print(word, original_composed_cosine, c_idx) nearest_neighbours[word] = sorted_neighbours return nearest_neighbours
def load_term_embeddings( term_ids: Set[int], emb_path: str, idx_to_term: Dict[int, str]) -> Dict[int, List[float]]: """Get all embeddings for the given terms from the given file. Args: term_ids: The ids of the input terms. emb_path: The path to the given embedding file. idx_to_term: Maps term_id to term. Return: A dictionary of the form: {term_id: embedding} """ pck = False if emb_path.endswith('.pickle'): pck = True print(' SPECIAL CASE: load embeddings from pickle...') with open(emb_path, 'rb') as f: emb_dict = pickle.load(f) # *** This code would be used if local mean would be # computed. # print(' Calculating average embeddings...') # model = {} # for term_id in emb_dict: # embs = [] # for doc_id in emb_dict[term_id]: # embs.extend(emb_dict[term_id][doc_id]) # model[term_id] = np.mean(embs, axis=0) # *** model = {tid: emb for tid, emb in emb_dict.items()} else: logging.getLogger("gensim.models").setLevel(logging.WARNING) logging.getLogger("gensim.scripts.glove2word2vec").setLevel( logging.WARNING) logging.getLogger("gensim").setLevel(logging.WARNING) print('Load embeddings from:') print(emb_path) try: model = KeyedVectors.load(emb_path) except: model = Word2VecKeyedVectors.load_word2vec_format(emb_path, binary=True) term_id_to_emb = {} global_embs_ids = [] for term_id in term_ids: try: if pck: term_id_to_emb[term_id] = model[term_id] else: term_id_to_emb[term_id] = model.wv[str(term_id)] except KeyError: global_embs_ids.append((term_id, idx_to_term[term_id])) # term_id_to_emb[term_id] = term_ids_to_embs_global[term_id] if global_embs_ids: print('WARNING: No embeddings found for:', global_embs_ids) print('WARNING: {} terms excluded.'.format(len(global_embs_ids))) return term_id_to_emb
def save_gensim_model(words, word_reprs, output_file, binary=True): """Save word representations in w2v format. Word order is not preserved""" vocab = dict() for word in words: vocab[word] = Vocab(index=len(vocab)) model = Word2VecKeyedVectors(word_reprs.shape[1]) model.vocab = vocab model.vectors = word_reprs model.save_word2vec_format(fname=output_file, binary=binary)
def __init__(self, filename: str): self.model = Word2VecKeyedVectors.load_word2vec_format(filename) # Collect ranked list of words in vocab words = self.model.index2word w_rank = {} for i, word in enumerate(words): w_rank[word] = i self.words = w_rank
def eval_on_file(path_composed_emb, path_observed_emb, save_path): raw_observed_space = Word2VecKeyedVectors.load_word2vec_format( path_observed_emb, binary=False) targets = read_targets(path_composed_emb) raw_composed_space = Word2VecKeyedVectors.load_word2vec_format( path_composed_emb, binary=False) q1, q2, q3, ranks = evaluateRank(targets, raw_composed_space, raw_observed_space, 1000) print("Q1: " + str(q1) + ", Q2: " + str(q2) + ", Q3: " + str(q3)) if save_path: printDictToFile(ranks, save_path + '_rankedCompounds.txt') sortedRanks = sorted(ranks.values()) printListToFile(sortedRanks, save_path + '_ranks.txt') logResult(q1, q2, q3, save_path + '_quartiles.txt') return q1, q2, q3, ranks
def load_model(model_name, epoch): from gensim.models import KeyedVectors from gensim.models.keyedvectors import FastTextKeyedVectors, Word2VecKeyedVectors from gensim.models.fasttext import load_facebook_vectors, load_facebook_model from gensim.models.wrappers import FastText if epoch != '50+10': # if epoch choice is epoch 10 or 50 (no continued training with CSPC problem texts) if model_name.lower() == 'word2vec': return Word2VecKeyedVectors.load(f"trained_models/word2vec/idwiki.epoch-{epoch}.dim-300.kv") elif model_name.lower() == 'glove': return KeyedVectors.load_word2vec_format( f"trained_models/glove/converted.idwiki.epoch-{epoch}.dim-300.model.txt") elif model_name.lower() == 'fasttext': model = FastText.load_fasttext_format( f"trained_models/fasttext/idwiki.epoch-{epoch}.dim-300.bin") return model.wv else: # if epoch choice is 50+10, i.e. the 50 epoch word2vec model that's trained further with CSPC problem texts return Word2VecKeyedVectors.load(f"trained_models/word2vec/idwiki-cspc.epoch-50.dim-300.kv") return None
def convert_legacy_to_keyvec(legacy_w2v): dim = len(legacy_w2v[legacy_w2v.keys()[0]]) vectors = Word2VecKeyedVectors(dim) ws = [] vs = [] for word, vect in legacy_w2v.items(): ws.append(word) vs.append(vect) assert(len(vect) == dim) vectors.add(ws, vs, replace=True) return vectors
def load_word_vectors(key_vecs_file, weights_file): """ loads w2v keyvecs and lexicon into memory :param key_vecs_file: path to keyvecs file :param weights_file: path to lexicon w2v file :return: keyvecs, lexicon """ logger.info("loading word2vec model...") wv = Word2VecKeyedVectors.load(key_vecs_file) weights = np.load(weights_file) return wv, weights
def load_word2vec_file(path): """Load from a word2vec file. Parameters: path (Path): The path to the word2vec file. Returns: WordEmbedding: The resulting word embedding. """ with redirect_stderr(open(os.devnull)): gensim_obj = Word2VecKeyedVectors.load_word2vec_format(str(path)) return WordEmbedding( gensim_obj=gensim_obj, source=path, )
def distance_of_10_pairs_of_words(X: np.ndarray, word2vec_model: Word2VecKeyedVectors): speaker_offset = int(X.shape[0] / 2) total_samples = 10 speaker_1_random = X[np.random.choice(speaker_offset, total_samples, replace=False)] speaker_2_random = X[speaker_offset + np.random.choice(speaker_offset, total_samples, replace=False)] for speaker_1, speaker_2 in zip(speaker_1_random, speaker_2_random): speaker_1 = [w for w in speaker_1.split() if w in word2vec_model] speaker_2 = [w for w in speaker_2.split() if w in word2vec_model] distance = word2vec_model.n_similarity(speaker_1, speaker_2) print("The distance between:") print(" ".join(speaker_1)) print(" ".join(speaker_2)) print("Distance: ", distance)
def __init__(self, threshold=0.5, word2vecpath="model/word_embedding/embedding.wv", datapath="news_ch_2_seg/7.json"): self.threshold = threshold self.stopword2tag = {'m', 'p', 'x', 'c', 'uj', 'd', 'f', 'r', 'ul'} self.stopword2tag.add('a') self.word2vec = Word2VecKeyedVectors.load(word2vecpath) with open(datapath, 'r') as load_f: self.data = json.load(load_f) self.content, self.title, self.label = [], [], [] self.Xtrain = None self.init() self.de_stopword() self.vectorize()
def __get_similarity_words__(embeddings: Word2VecKeyedVectors, words: list, other_words: list) -> float: if len(words) == 0 or len(other_words) == 0: return 0 summed_avgs = 0 for w in words: dist = [] for o_w in other_words: sim = embeddings.similarity(w, o_w) dist.append(sim) avg = sum(dist) / len(other_words) summed_avgs += avg return summed_avgs / len(words)
def load_legacy_w2v_as_keyvecs(w2v_file, dim=50): vectors = None with open(w2v_file, 'r') as f: vectors = Word2VecKeyedVectors(dim) ws = [] vs = [] for line in f: vect = line.strip().rsplit() word = vect[0] vect = np.array([float(x) for x in vect[1:]]) if(dim == len(vect)): ws.append(word) vs.append(vect) vectors.add(ws, vs, replace=True) return vectors
def model_to_csv(target_model): model_name = 'WORD2VEC_' + target_model + '.model' model = Word2VecKeyedVectors.load(os.path.join(CONFIG.EMBEDDING_PATH,model_name)) vocab = list(model.vocab) vocab_list = [x for x in vocab] print("vocab length: ", len(vocab_list)) # f_csv = open(DF_PATH+'Word2VecBlog300_5_min10_mecab.csv', 'w', encoding='utf-8-sig', newline='') print("started to write csv") csv_name = target_model + '.csv' f_csv = open(os.path.join(CONFIG.CSV_PATH, csv_name), 'w', encoding='utf-8-sig', newline='') wr = csv.writer(f_csv) for voca in vocab_list: wr.writerow([voca]+model[voca].tolist()) f_csv.close() print("completed to write csv")
def __init__(self, dimensions=None, gensim_obj=None, source=None): """Initialize a word embedding. At least one of dimensions and gensim_obj must be provided. If both are used, dimensions is ignored. Parameters: dimensions (int): The number of dimensions of the embedding. gensim_obj (gensim.Word2VecKeyedVectors): A gensim word embedding or related model. source (Path): The path of the source file. Raises: ValueError: If neither dimensions nor gensim_obj is provided. If dimensions is not a positive integer. If the word vectors in the gensim_obj cannot be determined. """ if dimensions is None and gensim_obj is None: raise ValueError( 'one of dimensions or gensim_obj must be provided') if gensim_obj is None: if not isinstance(dimensions, int) and dimensions > 0: raise ValueError('dimensions must be a positive integer') self.keyed_vectors = Word2VecKeyedVectors(dimensions) elif isinstance(gensim_obj, WordEmbeddingsKeyedVectors): if not hasattr(gensim_obj, 'save_word2vec_format'): raise ValueError( f'gensim_obj {type(gensim_obj)} does not have attribute "save_word2vec_format"' ) self.keyed_vectors = gensim_obj elif isinstance(gensim_obj, BaseWordEmbeddingsModel): if not hasattr(gensim_obj, 'wv'): raise ValueError( f'gensim_obj {type(gensim_obj)} does not have attribute "wv"' ) self.keyed_vectors = gensim_obj.wv else: raise ValueError( f'unable to determine word vectors in gensim object {gensim_obj}' ) self.source = source # forcefully normalize the vectors self.keyed_vectors.vectors = normalize(self.keyed_vectors.vectors)
def main(): parser = argparse.ArgumentParser( description="Time how long it takes gensim to read an embedding file.") parser.add_argument("embedding", help="The path ot the embeddings file to read") parser.add_argument("--format", required=True, choices=("binary", "text")) args = parser.parse_args() binary = True if args.format == "binary" else False tic = time.time() _ = Word2VecKeyedVectors.load_word2vec_format(args.embedding, binary=binary) toc = time.time() print( json.dumps({ "file": args.embedding, "format": args.format, "time": toc - tic }))
def parse_program(program: str, parser: Parser = None, code2vec: Word2VecKeyedVectors = None) -> nx.DiGraph: if parser is None: parser: Parser = get_parser() tree = parser.parse(bytes(program, "utf8")) # 建立一个空的有向图 g: nx.DiGraph = nx.DiGraph() queue: Queue = Queue() queue.put(tree.root_node) while not queue.empty(): # 按照宽度优先的顺序来建立一个有向图 node = queue.get() if not hasattr(node, 'children'): continue # 依次将父节点与子节点连接起来:root-child 建立边的关系 for child in node.children: g.add_edge(TreeSitterNode(node, program), TreeSitterNode(child, program)) queue.put(child) # embedding are added to each node # 使用code2vec的嵌入表示来初始化表示图中的节点 if code2vec is not None: zeros = np.zeros(code2vec.vector_size) for node in g.nodes: name = node.name.lower() if name in code2vec: g.add_node(node, data=code2vec.get_vector(name)) else: g.add_node(node, data=zeros) return g
def __init__(self, model: str = "glove", aggregation: str = "average"): """ Load pre-trained embeddings, either locally if model is a local file path or a Word2VecKeyedVector object, or downloaded from the gensim API if a string is provided. """ if aggregation not in {"average", "sum", "minmax"}: raise ValueError( f"Unknown embeddings aggregation mode: {aggregation}, the available " "ones are: average, sum, or minmax.") if isinstance(model, str): model = model.lower() if model in DEFAULT_PRETRAINED_EMBEDDINGS.keys(): model_gensim_name = DEFAULT_PRETRAINED_EMBEDDINGS[model] self.model = api.load(model_gensim_name) elif model in api.info()["models"].keys(): self.model = api.load(model) # pragma: no cover elif os.path.exists(model): logger.info("Loading local model") self.model = Word2VecKeyedVectors.load(model) if not isinstance(self.model, Word2VecKeyedVectors): raise TypeError( "The input model should be a Word2VecKeyedVectors object but " f"it is a {type(self.model)} object.") else: raise KeyError( f"Unknown pre-trained model name: {model}. Available models are" + ", ".join(api.info()["models"].keys())) logger.info("Loaded model keyed vectors: " + model) elif isinstance(model, Word2VecKeyedVectors): self.model = model logger.info("Loaded model keyed vectors.") else: raise TypeError( "Input pre-trained model should be a string or a gensim " "Word2VecKeyedVectors object") self.aggregation = aggregation self.embedding_dimension = self.model.vector_size if self.aggregation == "minmax": self.embedding_dimension *= 2
def retrain(): with app.app_context(): temp = Projects.query.with_entities(Projects.title).all() titles = [i[0] for i in temp] temp = Projects.query.with_entities(Projects.abstract).all() abstracts = [i[0] for i in temp] msrcsv = 'MetaData/' + 'MSRTrainData.csv' leecsv = 'MetaData/' + 'LeeDocSimTrain.csv' tit_df = pd.read_csv(msrcsv, error_bad_lines=False) abs_df = pd.read_csv(leecsv, error_bad_lines=False) word_model = Word2VecKeyedVectors.load("MetaData/" + WORD_VEC_MODEL) new_words_list = [] for index, row in tit_df.iterrows(): for i in [row['Sentence1'], row['Sentence2']]: new_words_list.append(preprocess_string(remove_stopwords(i))) for index, row in abs_df.iterrows(): for i in [row['Document1'], row['Document2']]: new_words_list.append(preprocess_string(remove_stopwords(i))) for i in titles: new_words_list.append(preprocess_string(remove_stopwords(i))) for i in abstracts: new_words_list.append(preprocess_string(remove_stopwords(i))) new_model = Word2Vec(new_words_list, size=DIMENSIONS, window=5, min_count=1, workers=4) word_vecs = [] words = [] for lis in new_words_list: for word in lis: words.append(word) word_vecs.append(new_model.wv[word]) word_model.add(words, word_vecs, replace=False) word_model.save("MetaData/" + WORD_VEC_MODEL)
def train(path_corpus: str, fname: str, path_out_dir: str, term_ids: Set[int], doc_ids: Set[int]) -> str: """'Train ELMo embeddings. This means averaging for context. ****** IMPORTANT: At the moment no averaging is done! So the input embeddings are just returned as output embeddings! ****** Args: path_corpus: The path to the text file used for training. fname: The filename for the embedding file. path_out_dir: The path to the output directory. term_ids: The set of current term-ids. doc_ids: The set of doc-ids making up the current subcorpus. Return: The path to the embedding file. """ raw_path = 'embeddings/{}.vec'.format(fname) path_out = os.path.join(path_out_dir, raw_path) # *** tmp lines *** tmp_path_in = os.path.join(path_out_dir, 'embeddings/embs_token_ELMo_avg.pickle') averaged_embs = pickle.load(open(tmp_path_in, 'rb')) averaged_embs = {str(k): v for k, v in averaged_embs.items()} # *** tmp lines *** key = list(averaged_embs.keys())[0] vector_size = len(averaged_embs[key]) m = Word2VecKeyedVectors(vector_size=vector_size) m.vocab = averaged_embs m.vectors = np.array(list(averaged_embs.values())) my_save_word2vec_format(binary=True, fname=path_out, total_vec=len(averaged_embs), vocab=m.vocab, vectors=m.vectors) return path_out
def learn(self, nx_g, mapping): g = RiWalkGraph.RiGraph(nx_g, self.args) walk_time, bfs_time, ri_time, walks_writing_time = g.process_random_walks( ) print('walk_time', walk_time / self.args.workers, flush=True) print('bfs_time', bfs_time / self.args.workers, flush=True) print('ri_time', ri_time / self.args.workers, flush=True) print('walks_writing_time', walks_writing_time / self.args.workers, flush=True) wv = self.learn_embeddings() original_wv = Word2VecKeyedVectors(self.args.dimensions) original_nodes = list(mapping.keys()) original_vecs = [ wv.word_vec(str(mapping[node])) for node in original_nodes ] original_wv.add(entities=list(map(str, original_nodes)), weights=original_vecs) return original_wv