示例#1
0
def test(config):
    import sent2vec
    assert config.sent2vec.model is not None, "Please add sent2vec_model config value."
    sent2vec_model = sent2vec.Sent2vecModel()
    sent2vec_model.load_model(config.sent2vec.model)

    output_fn_test = OutputFnTest(sent2vec_model, config)

    test_set = Dataloader(config, 'data/test_stories.csv', testing_data=True)
    test_set.load_dataset('data/test.bin')
    test_set.load_vocab('./data/default.voc', config.vocab_size)
    test_set.set_output_fn(output_fn_test)

    generator_testing = test_set.get_batch(config.batch_size,
                                           config.n_epochs,
                                           random=True)

    keras_model = keras.models.load_model(
        './builds/leonhard/2018-06-08 12:04:03-entailmentv6_checkpoint_epoch-85.hdf5'
    )

    verbose = 0 if not config.debug else 1

    # test_batch = next(generator_testing)
    print(keras_model.metrics_names)
    loss = keras_model.evaluate_generator(generator_testing,
                                          steps=len(test_set) /
                                          config.batch_size,
                                          verbose=verbose)
    print(loss)
示例#2
0
    def __init__(self, embedding_path=None):
        try:
            import sent2vec  # See https://github.com/epfml/sent2vec
        except ImportError:
            logging.warning('Module sent2vec was not found.')
            logging.warning('Please install using `python -m pip install cython;'
                            'python -m pip install git+https://github.com/epfml/sent2vec` '
                            'to use EmbedRank')
            return

        super(EmbedRank, self).__init__()

        if embedding_path is None:
            model_name = 'wiki_bigrams.bin'
            self._embedding_path = os.path.join(self._models, model_name)
        else:
            self._embedding_path = embedding_path

        if not os.path.exists(self._embedding_path):
            logging.error('Could not find {}'.format(self._embedding_path))
            logging.error('Please download "sent2vec_wiki_bigrams" model from '
                            'https://github.com/epfml/sent2vec#downloading-sent2vec-pre-trained-models.')
            logging.error('And place it in {}.'.format(self._models))
            logging.error('Or provide an embedding path.')

        if EmbedRank._embedding_path is None or EmbedRank._embedding_path != self._embedding_path:
            logging.info('Loading sent2vec model')
            EmbedRank._embedding_model = sent2vec.Sent2vecModel()
            EmbedRank._embedding_model.load_model(self._embedding_path)
            self._embedding_model = EmbedRank._embedding_model
            EmbedRank._embedding_path = self._embedding_path
            logging.info('Done loading sent2vec model')

        # Initialize _pos here, if another selection function is used.
        self._pos = {'NOUN', 'PROPN', 'ADJ'}
    def _buildBioSentVecEmbedding(self):
        if (os.path.exists(self.sent_vec_embedding_filename + ".pkl")):
            pickle_in = open(self.sent_vec_embedding_filename + ".pkl", "rb")
            sent_vec_embedding = pickle.load(pickle_in)
            self.sent_vec_embedding = sent_vec_embedding
            return sent_vec_embedding

        model_path = "BioSentVec_PubMed_MIMICIII-bigram_d700.bin"
        model = sent2vec.Sent2vecModel()
        model.load_model(model_path)

        abstracts_dict = self.data.getAbstractsDict()

        sent_vec_embedding = {}

        for pmid in self.pmids:
            text = abstracts_dict[pmid]
            document_vector = np.zeros((NUM_SENT_VEC_FEATURES, ))
            for sentence in nltk.sent_tokenize(text):
                processed_sentence = self.__sentVecPreprocessSentence(sentence)
                document_vector += model.embed_sentence(
                    processed_sentence).reshape((NUM_SENT_VEC_FEATURES, ))
            sent_vec_embedding[pmid] = document_vector

        # Saving the embedding dictionary in a pickle file
        with open(self.sent_vec_embedding_filename + ".pkl", "wb") as f:
            pickle.dump(sent_vec_embedding, f)

        self.sent_vec_embedding = sent_vec_embedding

        return sent_vec_embedding
def getCategory(headline):
    model = sent2vec.Sent2vecModel()
    model.load_model('sent2vec/wiki_unigrams.bin')
    # headline = "You should protect your pets from disease."
    headline = headline.lower()
    categoryList = []

    with open('sent2vec/categories.txt', 'r') as categories:
        for line in categories:
            if not line[0] == '\n':
                categoryList.append(line.split("\n")[0])
    # print(str(categoryList))
    # embed headline
    emb = model.embed_sentence(headline)
    minDistCategory = None
    minDist = 1000000
    # embed categories and compare
    for i in range(len(categoryList)):
        times3category = (categoryList[i] + " ") * 3
        # print(times3category)
        # category embedding
        categoryEmb = model.embed_sentence(times3category)
        dist = distance.cosine(categoryEmb, emb)
        # print("Found cosine distance to category: " + categoryList[i] + " distance: " + str(dist))
        if dist < minDist:
            minDist = dist
            minDistCategory = categoryList[i]
    print("Category of headline: " + headline + " : " + minDistCategory)
    return minDistCategory
示例#5
0
 def __init__(self, trainedModel='torontobooks_unigrams.bin'):
     print 'Creating empty sent2vec model...'
     self.model = sent2vec.Sent2vecModel(
     )  #keeps the trained model on memory
     print 'Loading trained model. This might take a while...'
     self.model.load_model(trainedModel)
     print 'Model %s loaded' % trainedModel
def test(config, testing_set):
    import sent2vec
    assert config.sent2vec.model is not None, "Please add sent2vec_model config value."
    sent2vec_model = sent2vec.Sent2vecModel()
    sent2vec_model.load_model(config.sent2vec.model)

    preprocess_fn = PreprocessTest(sent2vec_model)
    testing_set.set_preprocess_fn(preprocess_fn)

    output_fn_test = OutputFnTest(sent2vec_model, config)

    testing_set.set_output_fn(output_fn_test)

    generator_testing = testing_set.get_batch(config.batch_size,
                                              config.n_epochs,
                                              random=True)

    keras_model = keras.models.load_model(
        './builds/leonhard/2018-05-19 22:33:08-entailmentv2_checkpoint_epoch-1810.hdf5'
    )

    verbose = 0 if not config.debug else 1

    # test_batch = next(generator_testing)
    loss = keras_model.evaluate_generator(generator_testing,
                                          steps=len(testing_set) /
                                          config.batch_size,
                                          verbose=verbose)
    print(loss)
def embeding(modelfile, data, matrix_file):
    if os.path.exists(matrix_file):
        return np.load(matrix_file)

    print("can not find pre-processed embedded file, start to embeding...")
    model = sent2vec.Sent2vecModel()
    model.load_model(modelfile)
    # emb = model.embed_sentence("once upon a time .")
    # data = list(data)
    embs = []
    for sentence in data:
        # print(sentence)
        if isinstance(sentence, str):
            print(sentence)
            emb = model.embed_sentence(sentence)
            # print(emb)
        else:
            print("not str:", sentence)
        embs.append(emb)
    embs = np.array(embs)
    embs = np.squeeze(embs)
    # print(np.shape(embs))
    # print(embs)
    np.save(matrix_file, embs)
    print("save to file: ", matrix_file)
    return embs
    def __init__(self, train_file, eval_file, output_dir, model_path,
                 force_create_emp, run_type, epochs):
        self.train_file = train_file
        self.eval_file = eval_file
        self.output_dir = output_dir
        self.model_path = model_path
        self.force_create_emb = force_create_emp
        self.biosentvec_model_path = "/mnt/nfs/work1/696ds-s20/kgunasekaran/sentvec/BioSentVec_PubMed_MIMICIII-bigram_d700.bin"
        self.biosentvec_model = sent2vec.Sent2vecModel()
        # hyparameters to be used and played around with
        self.batch_size = 12
        self.learning_rate = 5e-4
        self.input_size = 1400
        self.hidden_size = 400
        self.output_size = 2
        self.unbalanced = 500
        self.epochs = epochs
        #end hyperparameters
        try:
            print("loading biosentvec model..")
            self.biosentvec_model.load_model(self.biosentvec_model_path)
        except Exception as e:
            print("EXCEPTION:", e)

        print('model successfully loaded')
        self.stop_words = set(stopwords.words('english'))
        self.bio_asq_data_train = []
        self.bio_asq_data_eval = []
        self.qid_pred = {}
        self.qid_pred_prob = {}
        self.qid_target = {}
示例#9
0
    def __init__(self, env, config):
        super().__init__()
        self.entity_vocab = None
        self.config = config
        self.symbol_vocab = None
        self.predicate_vocab = None
        self.env = env
        self.sent2vec = sent2vec.Sent2vecModel()
        self.sent2vec.load_model(config['embeddings'])

        self.train_symbol = config['train_symbol']
        self.train_predicate = config['train_predicate']
        self.train_entity = config['train_entity']

        self.symbol_embedding = None
        self.predicate_embedding = None
        self.entity_embedding = None

        self.proj_sym = Projection(config['pred_dim'])
        self.proj_ent = Projection(config['ent_dim'])
        self.proj_pred = Projection(config['pred_dim'])
        self.lambda_ = 1.0


        self.init_embeddings()
 def __init__(self, model_name, is_static=False):
     self.is_static = is_static
     if is_static:
         self.model = sent2vec.Sent2vecModel()
         self.model.load_model(model_name)
     else:
         self.model = SentenceTransformer(model_name)
def sentence_embedding(doc, sent_model=sent2vec.Sent2vecModel()):
    """
    params: sent_model: sen2vec model
    """
    sentences = nltk.word_tokenize(doc)
    #re.sub("'t", 'ot', "n't, doesn't, can't, don't")
    res_sent = " ".join(item.lower() for item in sentences if item not in string.punctuation )
    def __init__(self, dataset_dir, word2vec_dict, word2vec_dim):
        self.spacy_en = spacy.load('en')
        self.X = []
        self.Y = []
        self.max_sentences_in_text = 0
        self.model = sent2vec.Sent2vecModel()
        self.model.load_model(
            '/home/mauricio/repo/datasets/word_vectors/enwiki_sent2vec_100.bin'
        )

        self.files_list = os.listdir(os.path.abspath(dataset_dir))

        for idx, file in enumerate(self.files_list):
            print("{} Processing file {}".format(idx, file))
            sample = []
            with open(os.path.join(os.path.abspath(dataset_dir), file),
                      'r') as f:
                lines = f.read().split("\n")

                text = [
                    l.split(".")[1].rstrip().lstrip().lower() for l in lines
                    if re.match('^STEP.*', l)
                ]
                sentences_count = len(text)

                if sentences_count > self.max_sentences_in_text:
                    self.max_sentences_in_text = sentences_count

            self.X.append(text)
            self.Y.append([1.0])
            self.X.append(text[::-1])
            self.Y.append([0.0])
        print("Maximum sentences in text: {}".format(
            self.max_sentences_in_text))
示例#13
0
    def __init__(self, modelPath):
        """Initialize the Sent2Vec model.

        Arguments:
            modelPath {str} -- the path to model
        """
        self.encoder = sent2vec.Sent2vecModel()
        self.encoder.load_model(modelPath)
示例#14
0
  def getSentenceVector(doc, model_params: dict = {}, encoder = "distilbert", model_name = 'distilbert-base-nli-mean-tokens' ):
  
    sp = spacy.load('en_core_web_sm')
    tokenized = sp(doc)
    sentences = []
    for token in tokenized.sents:
      sentences.append(token.text)

    if encoder in ['bert', 'xlnet', 'longformer', 'reformer', 'distilbert', 'roberta', 'bart']:
      # Use encoder for mapping tokens to embeddings
      word_embedding_model = models.Transformer(model_name, 
                  tokenizer_args= model_params['tokenizer_args'] if 'tokenizer_args' in model_params else {})
      # Apply mean pooling to get one fixed sized sentence vector
      pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(),
                                     pooling_mode_mean_tokens=True,
                                     pooling_mode_cls_token=False,
                                     pooling_mode_max_tokens=False)
      model = SentenceTransformer(modules=[word_embedding_model, pooling_model])   
      sentence_embeddings = model.encode(sentences)
    

    elif encoder == 'use':
      #!pip install embedding-as-service
      from embedding_as_service.text.encode import Encoder
      en = Encoder(embedding='use', model='use_dan', max_seq_length=256)
      sentence_embeddings = en.encode(texts=sentences)


    elif encoder == 'infersent':
      import nltk
      nltk.download('punkt')
      from models import InferSent
      params_model = {'bsize': 64, 'word_emb_dim': 300, 'enc_lstm_dim': 2048,
                      'pool_type': 'max', 'dpout_model': 0.0, 'version': 2}
      infersent = InferSent(params_model)
      W2V_PATH = 'drive/My Drive/wiki-news-300d-1M.vec'
      infersent.set_w2v_path(W2V_PATH)
      infersent.build_vocab(sentences, tokenize=True)
      sentence_embeddings = infersent.encode(sentences, tokenize=True)


    elif encoder == 'sent2vec':
      import sent2vec
      model = sent2vec.Sent2vecModel()
      model.load_model('drive/My Drive/torontobooks_unigram.bin') 
      sentence_embeddings = model.embed_sentences(sentences)
   

    elif encoder == 'laser':
      from laserembeddings import Laser
      laser = Laser()  ## Also used for multilingual sentence embeddings
      sentence_embeddings = laser.embed_sentences(sentences, lang='en') 
  
  
    else:
      raise ValueError('Invalid encoder {} or encoder Unavailable.'.format(encoder))  
  
    return list(zip(sentences, sentence_embeddings))
示例#15
0
def compute_sentence_embeddings_representation(data):
    import sent2vec
    sent2vec_model = sent2vec.Sent2vecModel()
    sent2vec_model.load_model('../DocAgg/lib/sent2vec/wiki_bigrams.bin')
    sentences = [' '.join(sample['tokens']) for sample in data]
    sentence_embeddings = sent2vec_model.embed_sentences(sentences)
    for i, sentence_embedding in enumerate(sentence_embeddings):
        data[i]['sentence_embeddings'] = sentence_embedding
    return data
    def load_word_vec_model(self):
        if self.config.word_vector_model == None:
            print("No word vector model provided! Aborting ...")
            exit()

        print("Loading word vector model ...")
        self.model = sent2vec.Sent2vecModel()
        self.model.load_model(self.config.word_vector_model)
        print("Word vector loaded!")
示例#17
0
    def prepare(self, unique_texts):
        if self.model is None:
            import sent2vec
            self.model = sent2vec.Sent2vecModel()
            self.model.load_model(self.model_path)

        sentences_preprocessed = self.preprocess_all(unique_texts)
        vecs = self.model.embed_sentences(sentences_preprocessed)
        self.set_sen2vec(unique_texts, vecs)
示例#18
0
    def __init__(self, hparams):
        self.hparams = hparams
        self.load_sts_data()
        self.define_prepare_text()
        self.model = sent2vec.Sent2vecModel()
        self.model.load_model(hparams.model)

        if hparams.dim_subspace > 0:
            self.get_projection()
def main(config):
    import sent2vec
    assert config.sent2vec.model is not None, "Please add sent2vec_model config value."
    sent2vec_model = sent2vec.Sent2vecModel()
    sent2vec_model.load_model(config.sent2vec.model)

    preprocess_fn = Preprocess(sent2vec_model)

    output_fn_test = OutputFnTest(sent2vec_model, config)

    train_set = SNLIDataloaderPairs('data/snli_1.0/snli_1.0_train.jsonl')
    train_set.set_preprocess_fn(preprocess_fn)
    train_set.set_output_fn(output_fn)

    test_set = Dataloader(config, 'data/test_stories.csv', testing_data=True)
    test_set.load_dataset('data/test.bin')
    test_set.load_vocab('./data/default.voc', config.vocab_size)
    test_set.set_output_fn(output_fn_test)
    # dev_set = SNLIDataloader('data/snli_1.0/snli_1.0_dev.jsonl')
    # dev_set.set_preprocess_fn(preprocess_fn)
    # dev_set.set_output_fn(output_fn)
    # test_set = SNLIDataloader('data/snli_1.0/snli_1.0_test.jsonl')

    generator_training = train_set.get_batch(config.batch_size,
                                             config.n_epochs)
    generator_dev = test_set.get_batch(config.batch_size, config.n_epochs)

    keras_model = model(config)

    verbose = 0 if not config.debug else 1
    timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    # Callbacks
    tensorboard = keras.callbacks.TensorBoard(log_dir='./logs/' + timestamp +
                                              '-entailmentv5/',
                                              histogram_freq=0,
                                              batch_size=config.batch_size,
                                              write_graph=False,
                                              write_grads=True)

    model_path = os.path.abspath(
        os.path.join(os.curdir, './builds/' + timestamp))
    model_path += '-entailmentv5_checkpoint_epoch-{epoch:02d}.hdf5'

    saver = keras.callbacks.ModelCheckpoint(model_path,
                                            monitor='val_loss',
                                            verbose=verbose,
                                            save_best_only=True)

    keras_model.fit_generator(generator_training,
                              steps_per_epoch=300,
                              epochs=config.n_epochs,
                              verbose=verbose,
                              validation_data=generator_dev,
                              validation_steps=len(test_set) /
                              config.batch_size,
                              callbacks=[tensorboard, saver])
示例#20
0
def senc_vector_model():
    model = sent2vec.Sent2vecModel()
    model.load_model(model_path)

    emb_1 = model.embed_sentence(sentence_1[0])
    emb_2 = model.embed_sentence(sentence_2[0])
    emb_3 = model.embed_sentence(sentence_3[0])

    cosine_dis(emb_1, emb_2)
    cosine_dis(emb_1, emb_3)
示例#21
0
 def __init__(self, model='sent2vec', pre_trained_model_path=''):
     self.s2v_model = None
     if model == 'sent2vec':
         try:
             self.s2v_model = sent2vec.Sent2vecModel()
             self.s2v_model.load_model(pre_trained_model_path)
         except Exception as e:
             print(e)
         print('pretrained model {} successfully loaded'.format(
             pre_trained_model_path))
示例#22
0
def encode(all_sections, model_path=None, chunk=0, chunk_size=2500, model=None):
    logging.info('loading model...')
    if model is None:
        model = sent2vec.Sent2vecModel()
        try:
            model.load_model(model_path)
        except Exception as e:
            print(e)
        logging.info('model successfully loaded')

    stop_words = set(stopwords.words('english'))

    chunk_meta = []
    chunk_vecs = []

    sorted_keys = list(all_sections.keys())
    sorted(sorted_keys)

    chunk_keys = sorted_keys[(chunk * chunk_size):((chunk + 1) * chunk_size)]

    logging.info('Running on keys %s...', str(chunk_keys[0:5]))

    def preprocess_sentence(text):
        text = text.replace('/', ' / ')
        text = text.replace('.-', ' .- ')
        text = text.replace('.', ' . ')
        text = text.replace('\'', ' \' ')
        text = text.lower()

        tokens = [token for token in word_tokenize(text) if token not in punctuation and token not in stop_words]

        return ' '.join(tokens)

    for k_idx, k in enumerate(chunk_keys):
        s_doc = time.time()
        logging.info('key %s (%s of %s) ', k, k_idx, len(chunk_keys))
        sentences = load_sents(all_sections, k)

        dim = model.get_emb_size()
        vectors = np.zeros((len(sentences), dim))
        gt = time.time
        t = gt()
        counter = 0
        for doc_id, sec_id, sentence_id, s in sentences:
            vectors[counter] = model.embed_sentence(preprocess_sentence(s))
            logging.log_every_n(logging.INFO, 'Processed %s sentences | %s seconds', 10, sentence_id, str(gt() - t))
            counter += 1
        e_t = gt()
        logging.info('Done! Processed %s Sentences | %s seconds', len(sentences), str(e_t - t))
        chunk_meta.extend(sentences)
        chunk_vecs.append(vectors)
        e_doc = time.time()
        logging.info('key %s (%s of %s)... %s seconds ', k, k_idx, len(chunk_keys), e_doc - s_doc)
    return chunk_vecs, chunk_meta
示例#23
0
def _sentvec_transform(sample, **kwargs):
    X, y = sample
    import sent2vec
    sentvec_model = kwargs[
        'sentvec_model'] if 'sentvec_model' in kwargs and kwargs[
            'sentvec_model'] else sent2vec.Sent2vecModel()
    X = sentvec_model.embed_sentences([' '.join(x) for x in X]) if type(
        X[0]) is not str else sentvec_model.embed_sentences([' '.join(X)])[0]
    mask = [1] * len(X) if type(X[0]) is not str else [[1] * len(X[x])
                                                       for x in range(len(X))]
    return [X, mask], y
示例#24
0
文件: util.py 项目: jacklxc/DEC-keras
def loadBioSent2VecModel(model_path):
    """
    Load bioSent2VecModel, which is ~ 20 GB.
    Input: model_path
    Returns: model object
    """
    model = sent2vec.Sent2vecModel()
    try:
        model.load_model(model_path)
        print('Model successfully loaded!')
    except Exception as e:
        print(e)
    return model
示例#25
0
 def __init__(self, cfg):
     super(BioSentVec, self).__init__()
     self.cfg = cfg
     checkpoint = cfg.model.checkpoint
     if not os.path.exists(checkpoint):
         if cfg.model.checkpoint_download:
             download_ressource(
                 checkpoint,
                 'https://ftp.ncbi.nlm.nih.gov/pub/lu/Suppl/BioSentVec/BioSentVec_PubMed_MIMICIII-bigram_d700.bin'
             )
         else:
             raise FileNotFoundError(checkpoint)
     self.model = sent2vec.Sent2vecModel()
     self.model.load_model(checkpoint)
def main(config, training_set, testing_set):
    import sent2vec
    assert config.sent2vec.model is not None, "Please add sent2vec_model config value."
    sent2vec_model = sent2vec.Sent2vecModel()
    sent2vec_model.load_model(config.sent2vec.model)

    preprocess_fn = Preprocess(sent2vec_model)

    training_set.set_preprocess_fn(preprocess_fn)
    testing_set.set_preprocess_fn(preprocess_fn)

    training_set.set_output_fn(output_fn_train)
    testing_set.set_output_fn(output_fn_test)

    generator_training = training_set.get_batch(config.batch_size,
                                                config.n_epochs,
                                                random=True)
    generator_testing = testing_set.get_batch(config.batch_size,
                                              config.n_epochs,
                                              random=True)

    cloze_model = keras_model(config)

    verbose = 0 if not config.debug else 1
    timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    # Callbacks
    tensorboard = keras.callbacks.TensorBoard(log_dir='./logs/scheduler-' +
                                              timestamp + '/',
                                              histogram_freq=0,
                                              batch_size=config.batch_size,
                                              write_graph=False,
                                              write_grads=True)

    model_path = os.path.abspath(
        os.path.join(os.curdir, './builds/' + timestamp))
    model_path += '-scheduler_checkpoint_epoch-{epoch:02d}.hdf5'

    saver = keras.callbacks.ModelCheckpoint(model_path,
                                            monitor='val_loss',
                                            verbose=verbose,
                                            save_best_only=True)

    cloze_model.fit_generator(
        generator_training,
        steps_per_epoch=len(training_set) / config.batch_size,
        epochs=config.n_epochs,
        verbose=verbose,
        validation_data=generator_testing,
        validation_steps=len(testing_set) / config.batch_size,
        callbacks=[tensorboard, saver])
示例#27
0
    def __init__(self, file_name, sim_thresh, sim_type):
        self.file_name = file_name
        self.sim_type = sim_type
        #self.model_path = "/mnt/nfs/work1/696ds-s20/kgunasekaran/sentvec/BioSentVec_PubMed_MIMICIII-bigram_d700.bin"
        self.model = sent2vec.Sent2vecModel()
        self.batch_size = 12
        try:
            self.model.load_model(self.model_path)
        except Exception as e:
            print("EXCEPTION:", e)

        print('model successfully loaded')
        self.stop_words = set(stopwords.words('english'))
        self.bio_asq_data = []
示例#28
0
def sent_embedding(text):
    lang = detect(text)
    text_pre = preprocessText(text)
    sentences = sent_tokenize(text_pre)
    model = sent2vec.Sent2vecModel()
    _os_path = "/home/thangnd/git/python/NLP_20182/text-summarizer-demo/web/models/"
    if(lang == 'en'):
        path = _os_path + "wiki_unigrams.bin" #load model cho tieng anh
    elif(lang == 'vi'):
        path = _os_path + "my_model.bin" #load model cho tieng viet
    else:
        return 0
    model.load_model(path)
    embs = model.embed_sentences(sentences)
    return embs, sentences
    def create_chitchat_bot(self):
        """Initializes self.chitchat_bot with some conversational model."""

        # Hint: you might want to create and train chatterbot.ChatBot here.
        # It could be done by creating ChatBot with the *trainer* parameter equals 
        # "chatterbot.trainers.ChatterBotCorpusTrainer"
        # and then calling *train* function with "chatterbot.corpus.english" param
        
        ########################
        #### YOUR CODE HERE ####
        
        ################# RHT_Conv_Bot #####################

        sent2vec_model = sent2vec.Sent2vecModel()
        sent2vec_model.load_model(self.conv_model)

        print("chitchat_bot created.")
        
        return sent2vec_model
示例#30
0
    def fit(self, *_):

        try:
            import sent2vec
        except ImportError:
            from wellcomeml.__main__ import download

            download("non_pypi_packages")
            import sent2vec

        if self.pretrained:
            model_path = check_cache_and_download(self.pretrained)
            self.model = sent2vec.Sent2vecModel()
            self.model.load_model(model_path)
        else:
            # Custom training not yet implemented
            raise NotImplementedError(
                "Fit only implemented for loading pretrained models")
        return self