示例#1
0
文件: main.py 项目: C0d1red/MIREA
import numpy as np
import random
from rnn import RNN
from data import train_data, test_data

# Создание словаря
vocab = list(set([w for text in train_data.keys() for w in text.split(' ')]))
vocab_size = len(vocab)
print('Уникальных слов в тренеровочных данных: {}'.format(vocab_size))

# Присвоение каждому слову из словаря индекс
word_to_idx = {w: i for i, w in enumerate(vocab)}
idx_to_word = {i: w for i, w in enumerate(vocab)}


def create_inputs(text):
    inputs = []
    for w in text.split(' '):
        v = np.zeros((vocab_size, 1))
        v[word_to_idx[w]] = 1
        inputs.append(v)
    return inputs


def softmax(xs):
    return np.exp(xs) / sum(np.exp(xs))


# Инициализация RNN
rnn = RNN(vocab_size, 2)
import numpy as np
import random
from rnn import RNN
from data import train_data, test_data
import random

#construct of vocabulary of words that exist in our data:
vocab = list(
    set([word for phrase in train_data.keys() for word in phrase.split(" ")]))
vocab_size = len(vocab)
"""
assign integer index to represent each word in vocab
need to represent each word with an index bc RNNs can't understand words
we have to give them numbers
"""
word_to_idx = {w: i for i, w in enumerate(vocab)}
idx_to_word = {i: w for i, w in enumerate(vocab)}
"""
xi input to RNN is a vector. can use one-hot encoding
we have 18 unique words in the vocabulary, so each xi will be 18 dimensional one-hot vector

create_inputs returns array of one-hot vectors that represent the words in the input text string
"""


def create_inputs(text):
    res = []
    for word in text.split(' '):
        vec = np.zeros((vocab_size, 1))  #start off as array of zeros
        vec[word_to_idx[word]] = 1
        res.append(vec)