def read_context2vec(model_param_file,gpu): model_reader = ModelReader(model_param_file,gpu) w = xp.array(model_reader.w) index2word = model_reader.index2word word2index=model_reader.word2index model = model_reader.model return w,index2word,word2index,model
def get_ctx2vec_models(cls): from os.path import dirname, join as pjoin from context2vec.common.model_reader import ModelReader return ModelReader( pjoin(dirname(__file__), "..", "..", "systems", "context2vec", "model.params")).model
def load_c2v_model(model_param_file): model_reader = ModelReader(model_param_file) w = model_reader.w word2index = model_reader.word2index index2word = model_reader.index2word model = model_reader.model model_full = [model,w,word2index,index2word] return model_full
def __init__(self, model_param_file: str, output_file_path: str, index_output_file_path: str, gpu: int, batch_size: int): self.sentence_length_batches = SentenceLength2WordIndices() self.batch_size = batch_size model_reader = ModelReader(model_param_file, gpu) self.model = model_reader.model self.word2index = model_reader.word2index self.output_h5_file = h5py.File(output_file_path, 'w') self.index_output_file_path = index_output_file_path self.index_out = defaultdict(list)
def ReadModelC2Vec(model_param_file): ''' Fonction qui lit un modèle context2vec préentrainé Param: @model_param_file: (str) chemin vers le .param du modèle préentrainé Return: Les objets permettant d'utiliser context2vec ''' model_reader = ModelReader(model_param_file) w = model_reader.w word2index = model_reader.word2index index2word = model_reader.index2word model = model_reader.model return model, w, word2index, index2word
def __init__(self, lstm_model_params_filename, ignore_target, context_math, top_inferences_to_analyze): CsInferrer.__init__(self) self.ignore_target = ignore_target self.context_math = context_math self.top_inferences_to_analyze = top_inferences_to_analyze model_reader = ModelReader(lstm_model_params_filename) self.context_model = model_reader.model self.target_words = model_reader.w self.word2index = model_reader.word2index self.index2word = model_reader.index2word
def register_model(self): """ If the model is not registered this method creates that model and places it to the model register. If the model is registered just increments model reference count. This method helps to save computational resources e.g. when combining model prediction with embedding similarity by not loading into memory same model twice. """ if self.model_name not in Context2VecProbEstimator.loaded.keys(): model_reader = ModelReader( self.get_model_params_path(self.model_name)) Context2VecProbEstimator.loaded[self.model_name] = { "model": model_reader.model.context2vec, "word2id": model_reader.word2index, "embeddings": model_reader.w, } Context2VecProbEstimator.loaded[self.model_name]["ref_count"] = 1 else: Context2VecProbEstimator.loaded[self.model_name]["ref_count"] += 1
def correct_and_save_tweets(dico, c2v_model_path, input_path, output_path, fast=True): model_reader = ModelReader(c2v_model_path) w = model_reader.w word2index = model_reader.word2index index2word = model_reader.index2word model = model_reader.model tweets = read_content(input_path) tweets = tweets[:10] f = open(output_path, 'w') for tweet in tweets: f.write(correct_tweet(tweet, fast) + '\n') f.close()
def main(args): model_param_file = args.model_file model_reader = ModelReader(gpu=0, config_file=model_param_file) sent, target_pos = parse_input(args.inputs) evaluate_candidates(model_reader, sent, target_pos)
if __name__ == '__main__': debug = False if len(sys.argv) < 4: sys.stderr.write( "Usage: %s <questions-filename> <gold-filename> <results-filename>" " <model-params-filename>\n" % sys.argv[0]) sys.exit(1) questions_fd = open(sys.argv[1], 'r', encoding='utf-8') gold_fd = open(sys.argv[2], 'r', encoding='utf-8') results_fd = open(sys.argv[3], 'w', encoding='utf-8') model_params_filename = sys.argv[4] model_reader = ModelReader(model_params_filename) total_questions = 0 correct = 0 while True: best_answer = answer_next_question(questions_fd, model_reader.model, model_reader.w, model_reader.word2index) gold_answer = read_next_answer(gold_fd, model_reader.word2index) if best_answer is None or gold_answer is None: break total_questions += 1 if best_answer == gold_answer: correct += 1 accuracy = float(correct) / total_questions
if len(sys.argv) < 2: print >> sys.stderr, "Usage: %s <model-param-file>" % (sys.argv[0]) sys.exit(1) model_param_file = sys.argv[1] n_result = 10 # number of search result to show gpu = -1 # todo: make this work with gpu if gpu >= 0: cuda.check_cuda_available() cuda.get_device(gpu).use() xp = cuda.cupy if gpu >= 0 else numpy model_reader = ModelReader(model_param_file) w = model_reader.w word2index = model_reader.word2index index2word = model_reader.index2word model = model_reader.model while True: try: line = six.moves.input('>> ') sent, target_pos = parse_input(line) if target_pos == None: raise ParseException("Can't find the target position.") if sent[target_pos] == None: target_v = None elif sent[target_pos] not in word2index:
if len(sys.argv) < 2: #print >> sys.stderr, "Usage: %s <model-param-file>" % (sys.argv[0]) sys.exit(1) model_param_file = sys.argv[1] n_result = 10 # number of search result to show gpu = -1 # todo: make this work with gpu if gpu >= 0: cuda.check_cuda_available() cuda.get_device(gpu).use() xp = cuda.cupy if gpu >= 0 else numpy model_reader = ModelReader(ukwac.params) w = model_reader.w word2index = model_reader.word2index index2word = model_reader.index2word model = model_reader.model while True: try: line = six.moves.input('>> ') sent, target_pos = parse_input(line) if target_pos == None: raise ParseException("Can't find the target position.") if sent[target_pos] == None: target_v = None elif sent[target_pos] not in word2index:
f.close() if __name__ == '__main__': # Load corpus to creat dictionnary print("Loading brown corpus...") nltk.download('brown') from nltk.corpus import brown dico = set(brown.words()) print("Brown corpus loaded!") # Load context2vec Model print("Loading context2vec Model...") c2v_model_path = sys.argv[1] model_reader = ModelReader(c2v_model_path) w = model_reader.w word2index = model_reader.word2index index2word = model_reader.index2word model = model_reader.model print("Context2vec Model loaded!") # Load tweets print("Loading tweets...") input_path = sys.argv[2] tweets = read_content(input_path) tweets = tweets[:1000] print("Tweets loaded!") #Use the fast method to compute propositions before language model fast = sys.argv[4].lower() == 'true'
def read_context2vec_model(context2vec_param_file, gpu): model = ModelReader(context2vec_param_file, gpu) return model